18700: Merge branch 'main'
authorTom Clegg <tom@curii.com>
Tue, 12 Apr 2022 15:54:26 +0000 (11:54 -0400)
committerTom Clegg <tom@curii.com>
Tue, 12 Apr 2022 15:54:26 +0000 (11:54 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

73 files changed:
.licenseignore
AUTHORS
CITATION.cff [new file with mode: 0644]
apps/workbench/app/controllers/application_controller.rb
apps/workbench/config/arvados_config.rb
build/run-build-packages.sh
build/run-library.sh
build/run-tests.sh
cmd/arvados-server/cmd.go
doc/_includes/_container_scheduling_parameters.liquid
doc/admin/spot-instances.html.textile.liquid
doc/admin/upgrading.html.textile.liquid
doc/api/methods/collections.html.textile.liquid
doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
doc/sdk/cli/subcommands.html.textile.liquid
doc/user/cwl/cwl-extensions.html.textile.liquid
lib/boot/supervisor.go
lib/config/config.default.yml
lib/config/export.go
lib/config/load.go
lib/config/load_test.go
lib/controller/federation/conn.go
lib/controller/handler_test.go
lib/controller/integration_test.go
lib/controller/router/router.go
lib/service/cmd.go
sdk/R/DESCRIPTION
sdk/cwl/test_with_arvbox.sh
sdk/go/arvados/client.go
sdk/go/arvados/client_test.go
sdk/go/arvados/config.go
sdk/go/httpserver/logger.go
sdk/python/arvados/arvfile.py
sdk/python/arvados/collection.py
sdk/python/arvados/commands/get.py
sdk/python/arvados/keep.py
sdk/python/arvados/util.py
sdk/python/tests/test_arvfile.py
sdk/python/tests/test_collections.py
sdk/python/tests/test_util.py
services/api/app/controllers/arvados/v1/links_controller.rb
services/api/app/models/api_client_authorization.rb
services/api/app/models/arvados_model.rb
services/api/db/migrate/20220401153101_fix_created_at_indexes.rb [new file with mode: 0644]
services/api/db/structure.sql
services/api/test/fixtures/jobs.yml
services/api/test/fixtures/pipeline_instances.yml
services/api/test/functional/arvados/v1/query_test.rb
services/api/test/integration/permissions_test.rb
services/api/test/integration/select_test.rb
services/api/test/unit/container_request_test.rb
services/fuse/arvados_fuse/fusedir.py
services/fuse/tests/test_mount.py
services/keep-web/handler.go
services/keepproxy/.gitignore [deleted file]
services/keepproxy/keepproxy.go
services/keepproxy/keepproxy.service [deleted file]
services/keepproxy/keepproxy_test.go
services/keepproxy/proxy_client.go
tools/arvbox/bin/arvbox
tools/arvbox/lib/arvbox/docker/service/keepproxy/run-service
tools/salt-install/Vagrantfile
tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
tools/salt-install/config_examples/multi_host/aws/pillars/arvados_development.sls [new file with mode: 0644]
tools/salt-install/config_examples/multi_host/aws/states/shell_cron_add_login_sync.sls [new file with mode: 0644]
tools/salt-install/config_examples/multi_host/aws/states/shell_sudo_passwordless.sls [new file with mode: 0644]
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/arvados.sls
tools/salt-install/config_examples/single_host/single_hostname/pillars/arvados.sls
tools/salt-install/config_examples/single_host/single_hostname/states/shell_cron_add_login_sync.sls [new file with mode: 0644]
tools/salt-install/config_examples/single_host/single_hostname/states/shell_sudo_passwordless.sls [new file with mode: 0644]
tools/salt-install/provision.sh
tools/salt-install/tests/run-test.sh
tools/user-activity/arvados_user_activity/main.py

index 97ce38af93b89f2e69a60a152381574591a7d1c8..d13eee39015963c6d33ec6e48e6dbb52298258a8 100644 (file)
@@ -88,4 +88,5 @@ sdk/python/tests/fed-migrate/*.cwlex
 doc/install/*.xlsx
 sdk/cwl/tests/wf/hello.txt
 sdk/cwl/tests/wf/indir1/hello2.txt
-sdk/cwl/tests/chipseq/data/Genomes/*
\ No newline at end of file
+sdk/cwl/tests/chipseq/data/Genomes/*
+CITATION.cff
diff --git a/AUTHORS b/AUTHORS
index b8b75518ff08ad0f0d787f27f52ba8f58f2bece2..fa9fa86d34efe9aae853ca24d717974eb8513edc 100644 (file)
--- a/AUTHORS
+++ b/AUTHORS
@@ -18,7 +18,7 @@ President and Fellows of Harvard College <*@harvard.edu>
 Thomas Mooney <tmooney@genome.wustl.edu>
 Chen Chen <aflyhorse@gmail.com>
 Veritas Genetics, Inc. <*@veritasgenetics.com>
-Curii Corporation, Inc. <*@curii.com>
+Curii Corporation <*@curii.com>
 Dante Tsang <dante@dantetsang.com>
 Codex Genetics Ltd <info@codexgenetics.com>
 Bruno P. Kinoshita <brunodepaulak@yahoo.com.br>
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644 (file)
index 0000000..df3b35d
--- /dev/null
@@ -0,0 +1,37 @@
+cff-version: 1.2.0
+message: "If you use this software, please cite it as below."
+authors:
+- name: "The Arvados Authors"
+- family-names: "Amstutz"
+  given-names: "Peter"
+  orcid: "https://orcid.org/0000-0003-3566-7705"
+- family-names: "Bértoli"
+  given-names: "Javier"
+  family-names: "César"
+  given-names: "Nico"
+- family-names: "Clegg"
+  given-names: "Tom"
+  orcid: "https://orcid.org/0000-0001-6751-2930"
+- family-names: "Di Pentima"
+  given-names: "Lucas"
+  orcid: "https://orcid.org/0000-0002-2807-6854"
+- family-names: "Kutyła"
+  given-names: "Daniel"
+- family-names: "Li"
+  given-names: "Jiayong"
+- family-names: "Smith"
+  given-names: "Stephen"
+- family-names: "Vandewege"
+  given-names: "Ward"
+  orcid: "https://orcid.org/0000-0002-2527-6949"
+- family-names: "Wait Zaranek"
+  given-names: "Alexander"
+  orcid: "https://orcid.org/0000-0002-0415-9655"
+- family-names: "Wait Zaranek"
+  given-names: "Sarah"
+  orcid: "https://orcid.org/0000-0003-4716-9121"
+title: "Arvados"
+abstract: "Arvados is an open source platform for managing, processing, and sharing genomic and other large scientific and biomedical data."
+type: software
+url: "https://github.com/arvados/arvados/"
+doi: 10.5281/zenodo.6382942
index 5312e733f41eb992131752ccf782e112b8c5af2e..f8c8079a1edf4f5878c456d8f4ea0893e2574a32 100644 (file)
@@ -172,7 +172,7 @@ class ApplicationController < ActionController::Base
 
   def find_objects_for_index
     @objects ||= model_class
-    @objects = @objects.filter(@filters).limit(@limit).offset(@offset)
+    @objects = @objects.filter(@filters).limit(@limit).offset(@offset).order(@order)
     @objects.fetch_multiple_pages(false)
   end
 
index c5cc544b9b8717fc70b51c52c1c4b35ce03a16aa..7cc46d2983490896c36e96c9081bf3e74013efa2 100644 (file)
@@ -199,4 +199,8 @@ ArvadosWorkbench::Application.configure do
     ConfigValidators.validate_wb2_url_config()
     ConfigValidators.validate_download_config()
   end
+  if Rails.configuration.Users.AnonymousUserToken and
+     !Rails.configuration.Users.AnonymousUserToken.starts_with?("v2/")
+    Rails.configuration.Users.AnonymousUserToken = "v2/#{clusterID}-gj3su-anonymouspublic/#{Rails.configuration.Users.AnonymousUserToken}"
+  end
 end
index 23cf81bd70246a473d12751ba99ee028ec5ec6a3..164755fda6a42feeedfc202e8317f94b63e6f8b8 100755 (executable)
@@ -255,7 +255,7 @@ package_go_binary services/health arvados-health "$FORMAT" "$ARCH" \
     "Check health of all Arvados cluster services"
 package_go_binary services/keep-balance keep-balance "$FORMAT" "$ARCH" \
     "Rebalance and garbage-collect data blocks stored in Arvados Keep"
-package_go_binary services/keepproxy keepproxy "$FORMAT" "$ARCH" \
+package_go_binary cmd/arvados-server keepproxy "$FORMAT" "$ARCH" \
     "Make a Keep cluster accessible to clients that are not on the LAN"
 package_go_binary cmd/arvados-server keepstore "$FORMAT" "$ARCH" \
     "Keep storage daemon, accessible to clients on the LAN"
index fa2be6ac7a613c88d0083927cc9ad08137af0ffb..47c5e2a39a22f98cc0ad18631e814ea4f4285c63 100755 (executable)
@@ -698,12 +698,15 @@ handle_arvados_src () {
 
 # Usage: handle_libarvados_perl
 handle_libarvados_perl () {
-  if [[ -n "$ONLY_BUILD" ]] || [[ "$ONLY_BUILD" != "libarvados-perl" ]] ; then
+  if [[ -n "$ONLY_BUILD" ]] && [[ "$ONLY_BUILD" != "libarvados-perl" ]] ; then
     debug_echo -e "Skipping build of libarvados-perl package."
     return 0
   fi
-  cd "$WORKSPACE/sdk/perl"
+  # The perl sdk subdirectory is so old that it has no tag in its history,
+  # which causes version_at_commit.sh to fail. Just rebuild it every time.
+  cd "$WORKSPACE"
   libarvados_perl_version="$(version_from_git)"
+  cd "$WORKSPACE/sdk/perl"
 
   cd $WORKSPACE/packages/$TARGET
   test_package_presence libarvados-perl "$libarvados_perl_version"
@@ -721,7 +724,7 @@ handle_libarvados_perl () {
     perl Makefile.PL INSTALL_BASE=install >"$STDOUT_IF_DEBUG" && \
         make install INSTALLDIRS=perl >"$STDOUT_IF_DEBUG" && \
         fpm_build "$WORKSPACE/sdk/perl" install/lib/=/usr/share libarvados-perl \
-        dir "$(version_from_git)" install/man/=/usr/share/man \
+        dir "$libarvados_perl_version" install/man/=/usr/share/man \
         "$WORKSPACE/apache-2.0.txt=/usr/share/doc/libarvados-perl/apache-2.0.txt" && \
         mv --no-clobber libarvados-perl*.$FORMAT "$WORKSPACE/packages/$TARGET/"
   fi
index 3592efbdc2bee5f6c4e25251677c581dcf65c088..67c54c98b0244a72842bccfbb964ce3dbd164c0c 100755 (executable)
@@ -556,6 +556,12 @@ setup_ruby_environment() {
             done
             "$bundle" version | tee /dev/stderr | grep -q 'version 2'
         ) || fatal 'install bundler'
+       if test -d /var/lib/arvados-arvbox/ ; then
+           # Inside arvbox, use bundler-installed binstubs.  The
+           # system bundler and rail's own bin/bundle refuse to work.
+           # I don't know why.
+           bundle=binstubs/bundle
+       fi
     fi
 }
 
index c5465ee5615c3ca0a778f48603c66bb788a4b6be..26c3f485ea348bb723f04e21afd4a92816fedd47 100644 (file)
@@ -21,6 +21,7 @@ import (
        "git.arvados.org/arvados.git/lib/install"
        "git.arvados.org/arvados.git/lib/lsf"
        "git.arvados.org/arvados.git/lib/recovercollection"
+       "git.arvados.org/arvados.git/services/keepproxy"
        "git.arvados.org/arvados.git/services/keepstore"
        "git.arvados.org/arvados.git/services/ws"
 )
@@ -42,6 +43,7 @@ var (
                "dispatch-lsf":       lsf.DispatchCommand,
                "install":            install.Command,
                "init":               install.InitCommand,
+               "keepproxy":          keepproxy.Command,
                "keepstore":          keepstore.Command,
                "recover-collection": recovercollection.Command,
                "workbench2":         wb2command{},
index be046173ad028b92fd9b26a4a2994f5b4c9fd295..636b6df59c455d5badac1bda9ded03434403207d 100644 (file)
@@ -11,5 +11,5 @@ Parameters to be passed to the container scheduler (e.g., Slurm) when running a
 table(table table-bordered table-condensed).
 |_. Key|_. Type|_. Description|_. Notes|
 |partitions|array of strings|The names of one or more compute partitions that may run this container. If not provided, the system will choose where to run the container.|Optional.|
-|preemptible|boolean|If true, the dispatcher will ask for a preemptible cloud node instance (eg: AWS Spot Instance) to run this container.|Optional. Default is false.|
+|preemptible|boolean|If true, the dispatcher should use a preemptible cloud node instance (eg: AWS Spot Instance) to run this container.  Whether a preemptible instance is actually used "depends on cluster configuration.":{{site.baseurl}}/admin/spot-instances.html|Optional. Default is false.|
 |max_run_time|integer|Maximum running time (in seconds) that this container will be allowed to run before being cancelled.|Optional. Default is 0 (no limit).|
index 3837f30d6da99519b8ba9017e28267a1f0e8742d..703e70fb8636afe4d99b2b356bc4a4326d0d46e7 100644 (file)
@@ -16,31 +16,48 @@ Currently Arvados supports preemptible instances using AWS and Azure spot instan
 
 h2. Configuration
 
-Add entries to @InstanceTypes@ that have @Preemptible: true@.  Typically you want to add both preemptible and non-preemptible entries for each cloud provider VM type.  The @Price@ for preemptible instances is the maximum bid price, the actual price paid is dynamic and will likely be lower.  For example:
+First, configure some @InstanceTypes@ that have @Preemptible: true@. For a preemptible instance, @Price@ determines the maximum bid price; the actual price paid is dynamic and will likely be lower.
+
+Typically you want to add both preemptible and non-preemptible entries for each cloud provider VM type. To do this automatically, use @PreemptiblePriceFactor@ to enable a preemptible version of each listed type, using the given factor to set the maximum bid price relative to the non-preemptible price. Alternatively, you can configure preemptible instance types explicitly. For example, the following two configurations are equivalent:
 
 <pre>
 Clusters:
   ClusterID:
+    Containers:
+      PreemptiblePriceFactor: 0.8
     InstanceTypes:
       m4.large:
-        Preemptible: false
         ProviderType: m4.large
         VCPUs: 2
         RAM: 8GiB
         AddedScratch: 32GB
         Price: 0.1
-      m4.large.spot:
-        Preemptible: true
+</pre>
+
+<pre>
+Clusters:
+  ClusterID:
+    InstanceTypes:
+      m4.large:
         ProviderType: m4.large
         VCPUs: 2
         RAM: 8GiB
         AddedScratch: 32GB
         Price: 0.1
+      m4.large.preemptible:
+        Preemptible: true
+        ProviderType: m4.large
+        VCPUs: 2
+        RAM: 8GiB
+        AddedScratch: 32GB
+        Price: 0.08
 </pre>
 
 Next, you can choose to enable automatic use of preemptible instances:
 
 <pre>
+Clusters:
+  ClusterID:
     Containers:
       AlwaysUsePreemptibleInstances: true
 </pre>
index abaa190c8c781e26529fe795be0c06a303a8ace0..226c8667d67a710283c436db321dc1f26246593d 100644 (file)
@@ -28,9 +28,17 @@ TODO: extract this information based on git commit messages and generate changel
 <div class="releasenotes">
 </notextile>
 
-h2(#main). development main (as of 2022-03-08)
+h2(#main). development main (as of 2022-04-08)
 
-"previous: Upgrading to 2.3.0":#v2_3_0
+"previous: Upgrading to 2.4.0":#v2_4_0
+
+h2(#v2_4_0). v2.4.0 (2022-04-08)
+
+"previous: Upgrading to 2.3.1":#v2_3_1
+
+h3. Default result order changed
+
+When requesting a list of objects without an explicit @order@ parameter, the default order has changed from @modified_at desc, uuid asc@ to @modified_at desc, uuid desc@.  This means that if two objects have identical @modified_at@ timestamps, the tiebreaker will now be based on @uuid@ in decending order where previously it would be ascending order. The practical effect of this should be minor; with microsecond precision it is unusual to have two records with exactly the same timestamp, and order-sensitive queries should already provide an explicit @order@ parameter.
 
 h3. Ubuntu 18.04 Arvados Python packages now depend on python-3.8
 
@@ -44,27 +52,35 @@ The minimum supported Ruby version is now 2.6.  If you are running Arvados on De
 
 h3. Anonymous token changes
 
-The anonymous token configured in @Users.AnonymousUserToken@ must now be 32 characters or longer. This was already the suggestion in the documentation, now it is enforced. The @script/get_anonymous_user_token.rb@ script that was needed to register the anonymous user token in the database has been removed. Registration of the anonymous token is no longer necessary. If the anonymous token in @config.yml@ is specified as a full V2 token, that will now generate a warning - it should be updated to list just the secret (i.e. the part after the last forward slash).
-
-h3. Preemptible instance types are used automatically, if any are configured
+The anonymous token configured in @Users.AnonymousUserToken@ must now be 32 characters or longer. This was already the suggestion in the documentation, now it is enforced. The @script/get_anonymous_user_token.rb@ script that was needed to register the anonymous user token in the database has been removed. Registration of the anonymous token is no longer necessary.
 
-The default behavior for selecting "preemptible instances":{{site.baseurl}}/admin/spot-instances.html has changed. If your configuration lists any instance types with @Preemptible: true@, all child (non-top-level) containers will automatically be scheduled on preemptible instances. To avoid using preemptible instances except when explicitly requested by clients, add @AlwaysUsePreemptibleInstances: false@ in the @Containers@ config section. (Previously, preemptible instance types were never used unless the configuration specified @UsePreemptibleInstances: true@. That flag has been removed.)
+h3. Preemptible instance support changes
 
-h3. Role groups are visible to all users by default
+The @Containers.UsePreemptibleInstances@ option has been renamed to @Containers.AlwaysUsePreemptibleInstances@ and has the same behavior when @true@ and one or more preemptible instances are configured.  However, a value of @false@ no longer disables support for preemptible instances, instead users can now enable use of preemptible instances at the level of an individual workflow or workflow step.
 
-The permission model has changed such that all role groups are visible to all active users. This enables users to share objects with groups they don't belong to. To preserve the previous behavior, where role groups are only visible to members and admins, add @RoleGroupsVisibleToAll: false@ to the @Users@ section of your configuration file.
+In addition, there is a new configuration option @Containers.PreemptiblePriceFactor@ will automatically add a preemptible instance type corresponding to each regular instance type.  See "Using Preemptible instances":spot-instances.html for details.
 
 h3. Default LSF arguments have changed
 
 If you use LSF and your configuration specifies @Containers.LSF.BsubArgumentsList@, you should update it to include the new arguments (@"-R", "select[mem>=%MMB]", ...@, see "configuration reference":{{site.baseurl}}/admin/config.html). Otherwise, containers that are too big to run on any LSF host will remain in the LSF queue instead of being cancelled.
 
-h3. Previously trashed role groups will be deleted
+h3. Support for NVIDIA CUDA GPUs
 
-Due to a bug in previous versions, the @DELETE@ operation on a role group caused the group to be flagged as trash in the database, but continue to grant permissions regardless. After upgrading, any role groups that had been trashed this way will be deleted. This might surprise some users if they were relying on permissions that were still in effect due to this bug. Future @DELETE@ operations on a role group will immediately delete the group and revoke the associated permissions.
+Arvados now supports requesting NVIDIA CUDA GPUs for cloud and LSF (Slurm is currently not supported).  To be able to request GPU nodes, some additional configuration is needed:
 
-h3. Users are visible to other users by default
+"Including GPU support in cloud compute node image":{{site.baseurl}}/install/crunch2-cloud/install-compute-node.html#nvidia
 
-When a new user is set up (either via @AutoSetupNewUsers@ config or via Workbench admin interface) the user immediately becomes visible to other users. To revert to the previous behavior, where the administrator must add two users to the same group using the Workbench admin interface in order for the users to see each other, change the new @Users.ActivatedUsersAreVisibleToOthers@ config to @false@.
+"Configure cloud dispatcher for GPU support":{{site.baseurl}}/install/crunch2-cloud/install-dispatch-cloud.html#GPUsupport
+
+"LSF GPU configuration":{{site.baseurl}}/install/crunch2-lsf/install-dispatch.html
+
+h3. Role groups are visible to all users by default
+
+The permission model has changed such that all role groups are visible to all active users. This enables users to share objects with groups they don't belong to. To preserve the previous behavior, where role groups are only visible to members and admins, add @RoleGroupsVisibleToAll: false@ to the @Users@ section of your configuration file.
+
+h3. Previously trashed role groups will be deleted
+
+Due to a bug in previous versions, the @DELETE@ operation on a role group caused the group to be flagged as trash in the database, but continue to grant permissions regardless. After upgrading, any role groups that had been trashed this way will be deleted. This might surprise some users if they were relying on permissions that were still in effect due to this bug. Future @DELETE@ operations on a role group will immediately delete the group and revoke the associated permissions.
 
 h3. Dedicated keepstore process for each container
 
@@ -73,6 +89,14 @@ When Arvados runs a container via @arvados-dispatch-cloud@, the @crunch-run@ sup
 * If you already have a robust permanent keepstore infrastructure, you can set @Containers.LocalKeepBlobBuffersPerVCPU@ to 0 to disable this feature and preserve the previous behavior of sending container I/O traffic to your separately provisioned keepstore servers.
 * This feature is enabled only if no volumes use @AccessViaHosts@, and no volumes have underlying @Replication@ less than @Collections.DefaultReplication@. If the feature is configured but cannot be enabled due to an incompatible volume configuration, this will be noted in the @crunch-run.txt@ file in the container log.
 
+h2(#v2_3_1). v2.3.1 (2021-11-24)
+
+"previous: Upgrading to 2.3.0":#v2_3_0
+
+h3. Users are visible to other users by default
+
+When a new user is set up (either via @AutoSetupNewUsers@ config or via Workbench admin interface) the user immediately becomes visible to other users. To revert to the previous behavior, where the administrator must add two users to the same group using the Workbench admin interface in order for the users to see each other, change the new @Users.ActivatedUsersAreVisibleToOthers@ config to @false@.
+
 h3. Backend support for vocabulary checking
 
 If your installation uses the vocabulary feature on Workbench2, you will need to update the cluster configuration by moving the vocabulary definition file to the node where @controller@ runs, and set the @API.VocabularyPath@ configuration parameter to the local path where the file was placed.
index 5ff8d529f8376ceb8b5372f8a94873e921c0961e..a2a6a77e19f6350c1be3c770a1560f42c9cc6402 100644 (file)
@@ -109,6 +109,36 @@ table(table table-bordered table-condensed).
 
 Note: Because adding access tokens to manifests can be computationally expensive, the @manifest_text@ field is not included in results by default.  If you need it, pass a @select@ parameter that includes @manifest_text@.
 
+h4. Searching Collections for names of file or directories
+
+You can search collections for specific file or directory names (whole or part) using the following filter in a @list@ query.
+
+<pre>
+filters: [["file_names", "ilike", "%sample1234.fastq%"]]
+</pre>
+
+Note: @file_names@ is a hidden field used for indexing.  It is not returned by any API call.  On the client, you can programmatically enumerate all the files in a collection using @arv-ls@, the Python SDK @Collection@ class, Go SDK @FileSystem@ struct, the WebDAV API, or the S3-compatible API.
+
+As of this writing (Arvados 2.4), you can also search for directory paths, but _not_ complete file paths.
+
+In other words, this will work (when @dir3@ is a directory):
+
+<pre>
+filters: [["file_names", "ilike", "%dir1/dir2/dir3%"]]
+</pre>
+
+However, this will _not_ return the desired results (where @sample1234.fastq@ is a file):
+
+<pre>
+filters: [["file_names", "ilike", "%dir1/dir2/dir3/sample1234.fastq%"]]
+</pre>
+
+As a workaround, you can search for both the directory path and file name separately, and then filter on the client side.
+
+<pre>
+filters: [["file_names", "ilike", "%dir1/dir2/dir3%"], ["file_names", "ilike", "%sample1234.fastq%"]]
+</pre>
+
 h3. update
 
 Update attributes of an existing Collection.
index ee71d7a3f61b6b96577b34ff9d060509f0ee0b93..0ed7a599fc79df23528e78fd0691d9a14c2bf0d7 100644 (file)
@@ -74,7 +74,7 @@ Add or update the following portions of your cluster configuration file, @config
 </code></pre>
 </notextile>
 
-h4. NVIDIA GPU support
+h4(#GPUsupport). NVIDIA GPU support
 
 To specify instance types with NVIDIA GPUs, you must include an additional @CUDA@ section:
 
index 50d5d89871a612b368c0c46e966ed5717faa7a6a..5dda77ab5ee65cdf3700be3404f53455c0c25f28 100644 (file)
@@ -212,9 +212,10 @@ This is a frontend to @arv-get@.
 <notextile>
 <pre>
 $ <code class="userinput">arv keep get --help</code>
-usage: arv-get [-h] [--retries RETRIES]
+usage: arv-get [-h] [--retries RETRIES] [--version]
                [--progress | --no-progress | --batch-progress]
-               [--hash HASH | --md5sum] [-n] [-r] [-f | --skip-existing]
+               [--hash HASH | --md5sum] [-n] [-r]
+               [-f | -v | --skip-existing | --strip-manifest] [--threads N]
                locator [destination]
 
 Copy data from Keep to a local file or pipe.
@@ -223,19 +224,20 @@ positional arguments:
   locator            Collection locator, optionally with a file path or
                      prefix.
   destination        Local file or directory where the data is to be written.
-                     Default: /dev/stdout.
+                     Default: stdout.
 
 optional arguments:
   -h, --help         show this help message and exit
   --retries RETRIES  Maximum number of times to retry server requests that
-                     encounter temporary failures (e.g., server down). Default
-                     3.
+                     encounter temporary failures (e.g., server down).
+                     Default 3.
+  --version          Print version and exit.
   --progress         Display human-readable progress on stderr (bytes and, if
                      possible, percentage of total data size). This is the
                      default behavior when it is not expected to interfere
                      with the output: specifically, stderr is a tty _and_
-                     either stdout is not a tty, or output is being written to
-                     named files rather than stdout.
+                     either stdout is not a tty, or output is being written
+                     to named files rather than stdout.
   --no-progress      Do not display human-readable progress on stderr.
   --batch-progress   Display machine-readable progress on stderr (bytes and,
                      if known, total data size).
@@ -252,11 +254,19 @@ optional arguments:
   -f                 Overwrite existing files while writing. The default
                      behavior is to refuse to write *anything* if any of the
                      output files already exist. As a special case, -f is not
-                     needed to write to /dev/stdout.
-  --skip-existing    Skip files that already exist. The default behavior is to
-                     refuse to write *anything* if any files exist that would
-                     have to be overwritten. This option causes even devices,
-                     sockets, and fifos to be skipped.
+                     needed to write to stdout.
+  -v                 Once for verbose mode, twice for debug mode.
+  --skip-existing    Skip files that already exist. The default behavior is
+                     to refuse to write *anything* if any files exist that
+                     would have to be overwritten. This option causes even
+                     devices, sockets, and fifos to be skipped.
+  --strip-manifest   When getting a collection manifest, strip its access
+                     tokens before writing it.
+  --threads N        Set the number of download threads to be used. Take into
+                     account that using lots of threads will increase the RAM
+                     requirements. Default is to use 4 threads. On high
+                     latency installations, using a greater number will
+                     improve overall throughput.
 </pre>
 </notextile>
 
index d6148d7eee1f3d2992a9d43bcb9c2ca44b3b68b5..0e97e07da3864faa821173536836ef0b6d54a88d 100644 (file)
@@ -60,9 +60,9 @@ hints:
 
   cwltool:CUDARequirement:
     cudaVersionMin: "11.0"
-    cudaComputeCapabilityMin: "9.0"
-    deviceCountMin: 1
-    deviceCountMax: 1
+    cudaComputeCapability: "9.0"
+    cudaDeviceCountMin: 1
+    cudaDeviceCountMax: 1
 
   arv:UsePreemptible:
     usePreemptible: true
@@ -163,9 +163,9 @@ Request support for Nvidia CUDA GPU acceleration in the container.  Assumes that
 table(table table-bordered table-condensed).
 |_. Field |_. Type |_. Description |
 |cudaVersionMin|string|Required.  The CUDA SDK version corresponding to the minimum driver version supported by the container (generally, the SDK version 'X.Y' the application was compiled against).|
-|cudaComputeCapabilityMin|string|Required.  The minimum CUDA hardware capability (in 'X.Y' format) required by the application's PTX or C++ GPU code (will be JIT compiled for the available hardware).|
-|deviceCountMin|integer|Minimum number of GPU devices to allocate on a single node. Required.|
-|deviceCountMax|integer|Maximum number of GPU devices to allocate on a single node. Optional.  If not specified, same as @minDeviceCount@.|
+|cudaComputeCapability|string|Required.  The minimum CUDA hardware capability (in 'X.Y' format) required by the application's PTX or C++ GPU code (will be JIT compiled for the available hardware).|
+|cudaDeviceCountMin|integer|Minimum number of GPU devices to allocate on a single node. Required.|
+|cudaDeviceCountMax|integer|Maximum number of GPU devices to allocate on a single node. Optional.  If not specified, same as @cudaDeviceCountMin@.|
 
 h2(#UsePreemptible). arv:UsePreemptible
 
index 323f672348b84f7b45688ee25b05ad81f511517d..7daceccb93d04e0498f0eabaecdaabeb7a045de3 100644 (file)
@@ -246,7 +246,7 @@ func (super *Supervisor) run(cfg *arvados.Config) error {
                runServiceCommand{name: "controller", svc: super.cluster.Services.Controller, depends: []supervisedTask{seedDatabase{}}},
                runGoProgram{src: "services/arv-git-httpd", svc: super.cluster.Services.GitHTTP},
                runGoProgram{src: "services/health", svc: super.cluster.Services.Health},
-               runGoProgram{src: "services/keepproxy", svc: super.cluster.Services.Keepproxy, depends: []supervisedTask{runPassenger{src: "services/api"}}},
+               runServiceCommand{name: "keepproxy", svc: super.cluster.Services.Keepproxy, depends: []supervisedTask{runPassenger{src: "services/api"}}},
                runServiceCommand{name: "keepstore", svc: super.cluster.Services.Keepstore},
                runGoProgram{src: "services/keep-web", svc: super.cluster.Services.WebDAV},
                runServiceCommand{name: "ws", svc: super.cluster.Services.Websocket, depends: []supervisedTask{seedDatabase{}}},
index 8bbc33ba08493b2147148e1fcdb8538a4a991c52..6512389815dd4e3d1d786b09cf1cd5b3eedd929d 100644 (file)
@@ -917,7 +917,16 @@ Clusters:
       #
       # This flag is ignored if no preemptible instance types are
       # configured, and has no effect on top-level containers.
-      AlwaysUsePreemptibleInstances: true
+      AlwaysUsePreemptibleInstances: false
+
+      # Automatically add a preemptible variant for every
+      # non-preemptible entry in InstanceTypes below. The maximum bid
+      # price for the preemptible variant will be the non-preemptible
+      # price multiplied by PreemptiblePriceFactor. If 0, preemptible
+      # variants are not added automatically.
+      #
+      # A price factor of 1.0 is a reasonable starting point.
+      PreemptiblePriceFactor: 0
 
       # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the
       # cloud dispatcher for executing containers on worker VMs.
index a20c5b2c32866184ab48ee63c2919e962eaa6fef..8e23f0732856e95a5e711d1b9356dea1554de170 100644 (file)
@@ -133,6 +133,7 @@ var whitelist = map[string]bool{
        "Containers.MaxDispatchAttempts":           false,
        "Containers.MaxRetryAttempts":              true,
        "Containers.MinRetryPeriod":                true,
+       "Containers.PreemptiblePriceFactor":        false,
        "Containers.ReserveExtraRAM":               true,
        "Containers.RuntimeEngine":                 true,
        "Containers.ShellAccess":                   true,
index 8d498af170f2180881fac496c82900b1bd764d7f..5afb51c5adcfd6dce89d069670b9ce3aadde2a2e 100644 (file)
@@ -285,6 +285,19 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
                }
        }
 
+       // Preprocess/automate some configs
+       for id, cc := range cfg.Clusters {
+               ldr.autofillPreemptible("Clusters."+id, &cc)
+
+               if strings.Count(cc.Users.AnonymousUserToken, "/") == 3 {
+                       // V2 token, strip it to just a secret
+                       tmp := strings.Split(cc.Users.AnonymousUserToken, "/")
+                       cc.Users.AnonymousUserToken = tmp[2]
+               }
+
+               cfg.Clusters[id] = cc
+       }
+
        // Check for known mistakes
        for id, cc := range cfg.Clusters {
                for remote := range cc.RemoteClusters {
@@ -316,11 +329,6 @@ func (ldr *Loader) Load() (*arvados.Config, error) {
                                return nil, err
                        }
                }
-               if strings.Count(cc.Users.AnonymousUserToken, "/") == 3 {
-                       // V2 token, strip it to just a secret
-                       tmp := strings.Split(cc.Users.AnonymousUserToken, "/")
-                       cc.Users.AnonymousUserToken = tmp[2]
-               }
        }
        return &cfg, nil
 }
@@ -361,10 +369,12 @@ func (ldr *Loader) checkToken(label, token string, mandatory bool, acceptV2 bool
                if !strings.HasPrefix(token, "v2/") {
                        return fmt.Errorf("%s: unacceptable characters in token (only a-z, A-Z, 0-9 are acceptable)", label)
                }
-               ldr.Logger.Warnf("%s: token is a full V2 token, should just be a secret (remove everything up to and including the last forward slash)", label)
                if !acceptableTokenRe.MatchString(tmp[2]) {
                        return fmt.Errorf("%s: unacceptable characters in V2 token secret (only a-z, A-Z, 0-9 are acceptable)", label)
                }
+               if len(tmp[2]) < acceptableTokenLength {
+                       ldr.Logger.Warnf("%s: secret is too short (should be at least %d characters)", label, acceptableTokenLength)
+               }
        } else if len(token) < acceptableTokenLength {
                if ldr.Logger != nil {
                        ldr.Logger.Warnf("%s: token is too short (should be at least %d characters)", label, acceptableTokenLength)
@@ -527,3 +537,21 @@ func (ldr *Loader) logExtraKeys(expected, supplied map[string]interface{}, prefi
                }
        }
 }
+
+func (ldr *Loader) autofillPreemptible(label string, cc *arvados.Cluster) {
+       if factor := cc.Containers.PreemptiblePriceFactor; factor > 0 {
+               for name, it := range cc.InstanceTypes {
+                       if !it.Preemptible {
+                               it.Preemptible = true
+                               it.Price = it.Price * factor
+                               it.Name = name + ".preemptible"
+                               if it2, exists := cc.InstanceTypes[it.Name]; exists && it2 != it {
+                                       ldr.Logger.Warnf("%s.InstanceTypes[%s]: already exists, so not automatically adding a preemptible variant of %s", label, it.Name, name)
+                                       continue
+                               }
+                               cc.InstanceTypes[it.Name] = it
+                       }
+               }
+       }
+
+}
index 1ede805b0085a668b40169af458243a304c12e93..5270dcccce8b9d95b5b5fdb1e6fe9ad15f2e0426 100644 (file)
@@ -305,8 +305,6 @@ func (s *LoadSuite) TestNoUnrecognizedKeysInDefaultConfig(c *check.C) {
 
 func (s *LoadSuite) TestNoWarningsForDumpedConfig(c *check.C) {
        var logbuf bytes.Buffer
-       logger := logrus.New()
-       logger.Out = &logbuf
        cfg, err := testLoader(c, `
 Clusters:
  zzzzz:
@@ -695,3 +693,72 @@ Clusters:
        _, err = ldr.Load()
        c.Assert(err, check.ErrorMatches, `there is no default storage class.*`)
 }
+
+func (s *LoadSuite) TestPreemptiblePriceFactor(c *check.C) {
+       yaml := `
+Clusters:
+ z1111:
+  InstanceTypes:
+   Type1:
+    RAM: 12345M
+    VCPUs: 8
+    Price: 1.23
+ z2222:
+  Containers:
+   PreemptiblePriceFactor: 0.5
+  InstanceTypes:
+   Type1:
+    RAM: 12345M
+    VCPUs: 8
+    Price: 1.23
+ z3333:
+  Containers:
+   PreemptiblePriceFactor: 0.5
+  InstanceTypes:
+   Type1:
+    RAM: 12345M
+    VCPUs: 8
+    Price: 1.23
+   Type1.preemptible: # higher price than the auto-added variant would use -- should generate warning
+    ProviderType: Type1
+    RAM: 12345M
+    VCPUs: 8
+    Price: 1.23
+    Preemptible: true
+   Type2:
+    RAM: 23456M
+    VCPUs: 16
+    Price: 2.46
+   Type2.preemptible: # identical to the auto-added variant -- so no warning
+    ProviderType: Type2
+    RAM: 23456M
+    VCPUs: 16
+    Price: 1.23
+    Preemptible: true
+`
+       var logbuf bytes.Buffer
+       cfg, err := testLoader(c, yaml, &logbuf).Load()
+       c.Assert(err, check.IsNil)
+       cc, err := cfg.GetCluster("z1111")
+       c.Assert(err, check.IsNil)
+       c.Check(cc.InstanceTypes["Type1"].Price, check.Equals, 1.23)
+       c.Check(cc.InstanceTypes, check.HasLen, 1)
+
+       cc, err = cfg.GetCluster("z2222")
+       c.Assert(err, check.IsNil)
+       c.Check(cc.InstanceTypes["Type1"].Preemptible, check.Equals, false)
+       c.Check(cc.InstanceTypes["Type1"].Price, check.Equals, 1.23)
+       c.Check(cc.InstanceTypes["Type1.preemptible"].Preemptible, check.Equals, true)
+       c.Check(cc.InstanceTypes["Type1.preemptible"].Price, check.Equals, 1.23/2)
+       c.Check(cc.InstanceTypes["Type1.preemptible"].ProviderType, check.Equals, "Type1")
+       c.Check(cc.InstanceTypes, check.HasLen, 2)
+
+       cc, err = cfg.GetCluster("z3333")
+       c.Assert(err, check.IsNil)
+       // Don't overwrite the explicitly configured preemptible variant
+       c.Check(cc.InstanceTypes["Type1.preemptible"].Price, check.Equals, 1.23)
+       c.Check(cc.InstanceTypes, check.HasLen, 4)
+       c.Check(logbuf.String(), check.Matches, `(?ms).*Clusters\.z3333\.InstanceTypes\[Type1\.preemptible\]: already exists, so not automatically adding a preemptible variant of Type1.*`)
+       c.Check(logbuf.String(), check.Not(check.Matches), `(?ms).*Type2\.preemptible.*`)
+       c.Check(logbuf.String(), check.Not(check.Matches), `(?ms).*(z1111|z2222)[^\n]*InstanceTypes.*`)
+}
index d3819f6262df8f7df4134753d0359e1d04e12950..1b8ec9e64a6e01138a1bfc58a599a78eb2f0e44b 100644 (file)
@@ -69,14 +69,17 @@ func saltedTokenProvider(cluster *arvados.Cluster, local backend, remoteID strin
                        return nil, errors.New("no token provided")
                }
                for _, token := range incoming.Tokens {
-                       if strings.HasPrefix(token, "v2/"+cluster.ClusterID+"-") && remoteID == cluster.Login.LoginCluster {
-                               // If we did this, the login cluster
-                               // would call back to us and then
-                               // reject our response because the
-                               // user UUID prefix (i.e., the
-                               // LoginCluster prefix) won't match
-                               // the token UUID prefix (i.e., our
-                               // prefix).
+                       if strings.HasPrefix(token, "v2/"+cluster.ClusterID+"-") &&
+                               !strings.HasPrefix(token, "v2/"+cluster.ClusterID+"-gj3su-anonymouspublic/") &&
+                               remoteID == cluster.Login.LoginCluster {
+                               // If we did this, the login cluster would call back to us and then
+                               // reject our response because the user UUID prefix (i.e., the
+                               // LoginCluster prefix) won't match the token UUID prefix (i.e., our
+                               // prefix). The anonymous token is OK to forward, because (unlike other
+                               // local tokens for real users) the validation callback will return the
+                               // locally issued anonymous user ID instead of a login-cluster user ID.
+                               // That anonymous user ID gets mapped to the local anonymous user
+                               // automatically on the login cluster.
                                return nil, httpErrorf(http.StatusUnauthorized, "cannot use a locally issued token to forward a request to our login cluster (%s)", remoteID)
                        }
                        salted, err := auth.SaltToken(token, remoteID)
index 817cff79609dab91b7743f2869950974c8796082..5e467cb0588607d3deaa06c1d92326ed18f8f09c 100644 (file)
@@ -5,9 +5,11 @@
 package controller
 
 import (
+       "bytes"
        "context"
        "crypto/tls"
        "encoding/json"
+       "io"
        "io/ioutil"
        "net/http"
        "net/http/httptest"
@@ -36,13 +38,15 @@ var _ = check.Suite(&HandlerSuite{})
 type HandlerSuite struct {
        cluster *arvados.Cluster
        handler *Handler
+       logbuf  *bytes.Buffer
        ctx     context.Context
        cancel  context.CancelFunc
 }
 
 func (s *HandlerSuite) SetUpTest(c *check.C) {
+       s.logbuf = &bytes.Buffer{}
        s.ctx, s.cancel = context.WithCancel(context.Background())
-       s.ctx = ctxlog.Context(s.ctx, ctxlog.New(os.Stderr, "json", "debug"))
+       s.ctx = ctxlog.Context(s.ctx, ctxlog.New(io.MultiWriter(os.Stderr, s.logbuf), "json", "debug"))
        s.cluster = &arvados.Cluster{
                ClusterID:  "zzzzz",
                PostgreSQL: integrationTestCluster().PostgreSQL,
@@ -317,6 +321,16 @@ func (s *HandlerSuite) TestValidateRemoteToken(c *check.C) {
        }
 }
 
+func (s *HandlerSuite) TestLogTokenUUID(c *check.C) {
+       req := httptest.NewRequest("GET", "https://0.0.0.0/arvados/v1/users/current", nil)
+       req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveTokenV2)
+       req = req.WithContext(s.ctx)
+       resp := httptest.NewRecorder()
+       httpserver.LogRequests(s.handler).ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+       c.Check(s.logbuf.String(), check.Matches, `(?ms).*"tokenUUIDs":\["`+strings.Split(arvadostest.ActiveTokenV2, "/")[1]+`"\].*`)
+}
+
 func (s *HandlerSuite) TestCreateAPIToken(c *check.C) {
        req := httptest.NewRequest("GET", "/arvados/v1/users/current", nil)
        auth, err := s.handler.createAPItoken(req, arvadostest.ActiveUserUUID, nil)
index 6ffbbd2720857b893b03d29fb2133303e542ae00..50cf89c0d485156ac2e09c6e90934014b57bb7e3 100644 (file)
@@ -380,6 +380,56 @@ func (s *IntegrationSuite) TestGetCollectionAsAnonymous(c *check.C) {
        c.Check(coll.PortableDataHash, check.Equals, pdh)
 }
 
+// z3333 should forward the locally-issued anonymous user token to its login
+// cluster z1111. That is no problem because the login cluster controller will
+// map any anonymous user token to its local anonymous user.
+//
+// This needs to work because wb1 has a tendency to slap the local anonymous
+// user token on every request as a reader_token, which gets folded into the
+// request token list controller.
+//
+// Use a z1111 user token and the anonymous token from z3333 passed in as a
+// reader_token to do a request on z3333, asking for the z1111 anonymous user
+// object. The request will be forwarded to the z1111 cluster. The presence of
+// the z3333 anonymous user token should not prohibit the request from being
+// forwarded.
+func (s *IntegrationSuite) TestForwardAnonymousTokenToLoginCluster(c *check.C) {
+       conn1 := s.testClusters["z1111"].Conn()
+       s.testClusters["z3333"].Conn()
+
+       rootctx1, _, _ := s.testClusters["z1111"].RootClients()
+       _, anonac3, _ := s.testClusters["z3333"].AnonymousClients()
+
+       // Make a user connection to z3333 (using a z1111 user, because that's the login cluster)
+       _, userac1, _, _ := s.testClusters["z3333"].UserClients(rootctx1, c, conn1, "user@example.com", true)
+
+       // Get the anonymous user token for z3333
+       var anon3Auth arvados.APIClientAuthorization
+       err := anonac3.RequestAndDecode(&anon3Auth, "GET", "/arvados/v1/api_client_authorizations/current", nil, nil)
+       c.Check(err, check.IsNil)
+
+       var userList arvados.UserList
+       where := make(map[string]string)
+       where["uuid"] = "z1111-tpzed-anonymouspublic"
+       err = userac1.RequestAndDecode(&userList, "GET", "/arvados/v1/users", nil,
+               map[string]interface{}{
+                       "reader_tokens": []string{anon3Auth.TokenV2()},
+                       "where":         where,
+               },
+       )
+       // The local z3333 anonymous token must be allowed to be forwarded to the login cluster
+       c.Check(err, check.IsNil)
+
+       userac1.AuthToken = "v2/z1111-gj3su-asdfasdfasdfasd/this-token-does-not-validate-so-anonymous-token-will-be-used-instead"
+       err = userac1.RequestAndDecode(&userList, "GET", "/arvados/v1/users", nil,
+               map[string]interface{}{
+                       "reader_tokens": []string{anon3Auth.TokenV2()},
+                       "where":         where,
+               },
+       )
+       c.Check(err, check.IsNil)
+}
+
 // Get a token from the login cluster (z1111), use it to submit a
 // container request on z2222.
 func (s *IntegrationSuite) TestCreateContainerRequestWithFedToken(c *check.C) {
index 2cfcc4fc28287c8ee44166277ecaffe23d42e293..05bdb4754f0a860ac552867d42bf6e30af9eb4d6 100644 (file)
@@ -588,6 +588,23 @@ func (rtr *router) addRoute(endpoint arvados.APIEndpoint, defaultOpts func() int
                        "apiOptsType": fmt.Sprintf("%T", opts),
                        "apiOpts":     opts,
                }).Debug("exec")
+               // Extract the token UUIDs (or a placeholder for v1 tokens)
+               var tokenUUIDs []string
+               for _, t := range creds.Tokens {
+                       if strings.HasPrefix(t, "v2/") {
+                               tokenParts := strings.Split(t, "/")
+                               if len(tokenParts) >= 3 {
+                                       tokenUUIDs = append(tokenUUIDs, tokenParts[1])
+                               }
+                       } else {
+                               end := t
+                               if len(t) > 5 {
+                                       end = t[len(t)-5:]
+                               }
+                               tokenUUIDs = append(tokenUUIDs, "v1 token ending in "+end)
+                       }
+               }
+               httpserver.SetResponseLogFields(req.Context(), logrus.Fields{"tokenUUIDs": tokenUUIDs})
                resp, err := exec(ctx, opts)
                if err != nil {
                        logger.WithError(err).Debugf("returning error type %T", err)
index dbafc89fe4792d90b30e5fa70ec18b43a0c5107d..1ed2ac1314c3618014269450cbc8e78921988657 100644 (file)
@@ -150,6 +150,7 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
                "URL":     listenURL,
                "Listen":  srv.Addr,
                "Service": c.svcName,
+               "Version": cmd.Version.String(),
        }).Info("listening")
        if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
                logger.WithError(err).Errorf("error notifying init daemon")
index 75ac892b4ba6680a7af4c91e47f1e770fcd7020c..9c682412a04c6ae9d21448f2aa19742b791100e2 100644 (file)
@@ -1,7 +1,7 @@
 Package: ArvadosR
 Type: Package
 Title: Arvados R SDK
-Version: 0.0.6
+Version: 2.4.0
 Authors@R: c(person("Fuad", "Muhic", role = c("aut", "ctr"), email = "fmuhic@capeannenterprises.com"),
              person("Peter", "Amstutz", role = c("cre"), email = "peter.amstutz@curii.com"))
 Description: This is the Arvados R SDK
index 0021bc8d906c5531b70c79a87d9be169658b5c57..d38414fc811d433acb5751613c9974f1bbc0b0c5 100755 (executable)
@@ -118,14 +118,15 @@ elif [[ "$suite" =~ conformance-(.*) ]] ; then
      git clone https://github.com/common-workflow-language/cwl-\${version}.git
    fi
    cd cwl-\${version}
+   git checkout \${version}.0
 elif [[ "$suite" != "integration" ]] ; then
    echo "ERROR: unknown suite '$suite'"
    exit 1
 fi
 
-if [[ "$suite" != "integration" ]] ; then
-  git pull
-fi
+#if [[ "$suite" != "integration" ]] ; then
+#  git pull
+#fi
 
 export ARVADOS_API_HOST=localhost:8000
 export ARVADOS_API_HOST_INSECURE=1
@@ -154,18 +155,6 @@ else
   arv-keepdocker arvados/jobs latest
 fi
 
-cat >/tmp/cwltest/arv-cwl-jobs <<EOF2
-#!/bin/sh
-exec arvados-cwl-runner --api=jobs \\\$@
-EOF2
-chmod +x /tmp/cwltest/arv-cwl-jobs
-
-cat >/tmp/cwltest/arv-cwl-containers <<EOF2
-#!/bin/sh
-exec arvados-cwl-runner --api=containers \\\$@
-EOF2
-chmod +x /tmp/cwltest/arv-cwl-containers
-
 EXTRA=--compute-checksum
 
 if [[ $devcwl -eq 1 ]] ; then
@@ -176,8 +165,10 @@ env
 if [[ "$suite" = "integration" ]] ; then
    cd /usr/src/arvados/sdk/cwl/tests
    exec ./arvados-tests.sh $@
+elif [[ "$suite" = "conformance-v1.2" ]] ; then
+   exec cwltest --tool arvados-cwl-runner --test conformance_tests.yaml -N307 $@ -- \$EXTRA
 else
-   exec ./run_test.sh RUNNER=/tmp/cwltest/arv-cwl-${runapi} EXTRA="\$EXTRA" $@
+   exec ./run_test.sh RUNNER=arvados-cwl-runner EXTRA="\$EXTRA" $@
 fi
 EOF
 
index 5ec828667fc940ace2c3f59b6cdc643139ae3b14..24d5ac3e335c824f5ea4a444c6066ce37f3cc86f 100644 (file)
@@ -12,6 +12,7 @@ import (
        "errors"
        "fmt"
        "io"
+       "io/fs"
        "io/ioutil"
        "log"
        "net/http"
@@ -102,11 +103,60 @@ func NewClientFromConfig(cluster *Cluster) (*Client, error) {
 }
 
 // NewClientFromEnv creates a new Client that uses the default HTTP
-// client with the API endpoint and credentials given by the
-// ARVADOS_API_* environment variables.
+// client, and loads API endpoint and credentials from ARVADOS_*
+// environment variables (if set) and
+// $HOME/.config/arvados/settings.conf (if readable).
+//
+// If a config exists in both locations, the environment variable is
+// used.
+//
+// If there is an error (other than ENOENT) reading settings.conf,
+// NewClientFromEnv logs the error to log.Default(), then proceeds as
+// if settings.conf did not exist.
+//
+// Space characters are trimmed when reading the settings file, so
+// these are equivalent:
+//
+//   ARVADOS_API_HOST=localhost\n
+//   ARVADOS_API_HOST=localhost\r\n
+//   ARVADOS_API_HOST = localhost \n
+//   \tARVADOS_API_HOST = localhost\n
 func NewClientFromEnv() *Client {
+       vars := map[string]string{}
+       home := os.Getenv("HOME")
+       conffile := home + "/.config/arvados/settings.conf"
+       if home == "" {
+               // no $HOME => just use env vars
+       } else if settings, err := os.ReadFile(conffile); errors.Is(err, fs.ErrNotExist) {
+               // no config file => just use env vars
+       } else if err != nil {
+               // config file unreadable => log message, then use env vars
+               log.Printf("continuing without loading %s: %s", conffile, err)
+       } else {
+               for _, line := range bytes.Split(settings, []byte{'\n'}) {
+                       kv := bytes.SplitN(line, []byte{'='}, 2)
+                       k := string(bytes.TrimSpace(kv[0]))
+                       if len(kv) != 2 || !strings.HasPrefix(k, "ARVADOS_") {
+                               // Same behavior as python sdk:
+                               // silently skip leading # (comments),
+                               // blank lines, typos, and non-Arvados
+                               // vars.
+                               continue
+                       }
+                       vars[k] = string(bytes.TrimSpace(kv[1]))
+               }
+       }
+       for _, env := range os.Environ() {
+               if !strings.HasPrefix(env, "ARVADOS_") {
+                       continue
+               }
+               kv := strings.SplitN(env, "=", 2)
+               if len(kv) == 2 {
+                       vars[kv[0]] = kv[1]
+               }
+       }
        var svcs []string
-       for _, s := range strings.Split(os.Getenv("ARVADOS_KEEP_SERVICES"), " ") {
+       for _, s := range strings.Split(vars["ARVADOS_KEEP_SERVICES"], " ") {
                if s == "" {
                        continue
                } else if u, err := url.Parse(s); err != nil {
@@ -118,13 +168,13 @@ func NewClientFromEnv() *Client {
                }
        }
        var insecure bool
-       if s := strings.ToLower(os.Getenv("ARVADOS_API_HOST_INSECURE")); s == "1" || s == "yes" || s == "true" {
+       if s := strings.ToLower(vars["ARVADOS_API_HOST_INSECURE"]); s == "1" || s == "yes" || s == "true" {
                insecure = true
        }
        return &Client{
                Scheme:          "https",
-               APIHost:         os.Getenv("ARVADOS_API_HOST"),
-               AuthToken:       os.Getenv("ARVADOS_API_TOKEN"),
+               APIHost:         vars["ARVADOS_API_HOST"],
+               AuthToken:       vars["ARVADOS_API_TOKEN"],
                Insecure:        insecure,
                KeepServiceURIs: svcs,
                Timeout:         5 * time.Minute,
index df938008d49756b850ca6e5ce5abee8a0510e2a3..2363803cab1de157f4074d3a2770f2cc0c9201ca 100644 (file)
@@ -10,9 +10,12 @@ import (
        "io/ioutil"
        "net/http"
        "net/url"
+       "os"
+       "strings"
        "sync"
-       "testing"
        "testing/iotest"
+
+       check "gopkg.in/check.v1"
 )
 
 type stubTransport struct {
@@ -68,43 +71,36 @@ func (stub *timeoutTransport) RoundTrip(req *http.Request) (*http.Response, erro
        }, nil
 }
 
-func TestCurrentUser(t *testing.T) {
-       t.Parallel()
+var _ = check.Suite(&clientSuite{})
+
+type clientSuite struct{}
+
+func (*clientSuite) TestCurrentUser(c *check.C) {
        stub := &stubTransport{
                Responses: map[string]string{
                        "/arvados/v1/users/current": `{"uuid":"zzzzz-abcde-012340123401234"}`,
                },
        }
-       c := &Client{
+       client := &Client{
                Client: &http.Client{
                        Transport: stub,
                },
                APIHost:   "zzzzz.arvadosapi.com",
                AuthToken: "xyzzy",
        }
-       u, err := c.CurrentUser()
-       if err != nil {
-               t.Fatal(err)
-       }
-       if x := "zzzzz-abcde-012340123401234"; u.UUID != x {
-               t.Errorf("got uuid %q, expected %q", u.UUID, x)
-       }
-       if len(stub.Requests) < 1 {
-               t.Fatal("empty stub.Requests")
-       }
+       u, err := client.CurrentUser()
+       c.Check(err, check.IsNil)
+       c.Check(u.UUID, check.Equals, "zzzzz-abcde-012340123401234")
+       c.Check(stub.Requests, check.Not(check.HasLen), 0)
        hdr := stub.Requests[len(stub.Requests)-1].Header
-       if hdr.Get("Authorization") != "OAuth2 xyzzy" {
-               t.Errorf("got headers %+q, expected Authorization header", hdr)
-       }
+       c.Check(hdr.Get("Authorization"), check.Equals, "OAuth2 xyzzy")
 
-       c.Client.Transport = &errorTransport{}
-       u, err = c.CurrentUser()
-       if err == nil {
-               t.Errorf("got nil error, expected something awful")
-       }
+       client.Client.Transport = &errorTransport{}
+       u, err = client.CurrentUser()
+       c.Check(err, check.NotNil)
 }
 
-func TestAnythingToValues(t *testing.T) {
+func (*clientSuite) TestAnythingToValues(c *check.C) {
        type testCase struct {
                in interface{}
                // ok==nil means anythingToValues should return an
@@ -158,17 +154,66 @@ func TestAnythingToValues(t *testing.T) {
                        ok: nil,
                },
        } {
-               t.Logf("%#v", tc.in)
+               c.Logf("%#v", tc.in)
                out, err := anythingToValues(tc.in)
-               switch {
-               case tc.ok == nil:
-                       if err == nil {
-                               t.Errorf("got %#v, expected error", out)
-                       }
-               case err != nil:
-                       t.Errorf("got err %#v, expected nil", err)
-               case !tc.ok(out):
-                       t.Errorf("got %#v but tc.ok() says that is wrong", out)
+               if tc.ok == nil {
+                       c.Check(err, check.NotNil)
+                       continue
                }
+               c.Check(err, check.IsNil)
+               c.Check(tc.ok(out), check.Equals, true)
        }
 }
+
+func (*clientSuite) TestLoadConfig(c *check.C) {
+       oldenv := os.Environ()
+       defer func() {
+               os.Clearenv()
+               for _, s := range oldenv {
+                       i := strings.IndexRune(s, '=')
+                       os.Setenv(s[:i], s[i+1:])
+               }
+       }()
+
+       tmp := c.MkDir()
+       os.Setenv("HOME", tmp)
+       for _, s := range os.Environ() {
+               if strings.HasPrefix(s, "ARVADOS_") {
+                       i := strings.IndexRune(s, '=')
+                       os.Unsetenv(s[:i])
+               }
+       }
+       os.Mkdir(tmp+"/.config", 0777)
+       os.Mkdir(tmp+"/.config/arvados", 0777)
+
+       // Use $HOME/.config/arvados/settings.conf if no env vars are
+       // set
+       os.WriteFile(tmp+"/.config/arvados/settings.conf", []byte(`
+               ARVADOS_API_HOST = localhost:1
+               ARVADOS_API_TOKEN = token_from_settings_file1
+       `), 0777)
+       client := NewClientFromEnv()
+       c.Check(client.AuthToken, check.Equals, "token_from_settings_file1")
+       c.Check(client.APIHost, check.Equals, "localhost:1")
+       c.Check(client.Insecure, check.Equals, false)
+
+       // ..._INSECURE=true, comments, ignored lines in settings.conf
+       os.WriteFile(tmp+"/.config/arvados/settings.conf", []byte(`
+               (ignored) = (ignored)
+               #ARVADOS_API_HOST = localhost:2
+               ARVADOS_API_TOKEN = token_from_settings_file2
+               ARVADOS_API_HOST_INSECURE = true
+       `), 0777)
+       client = NewClientFromEnv()
+       c.Check(client.AuthToken, check.Equals, "token_from_settings_file2")
+       c.Check(client.APIHost, check.Equals, "")
+       c.Check(client.Insecure, check.Equals, true)
+
+       // Environment variables override settings.conf
+       os.Setenv("ARVADOS_API_HOST", "[::]:3")
+       os.Setenv("ARVADOS_API_HOST_INSECURE", "0")
+       client = NewClientFromEnv()
+       c.Check(client.AuthToken, check.Equals, "token_from_settings_file2")
+       c.Check(client.APIHost, check.Equals, "[::]:3")
+       c.Check(client.Insecure, check.Equals, false)
+}
index e0750bd8c54ae0a671f282ba9438d23e0bfb48ad..6c9324e478a273b68e83dcd7fb4e46150b8e0da1 100644 (file)
@@ -448,6 +448,7 @@ type ContainersConfig struct {
        StaleLockTimeout              Duration
        SupportedDockerImageFormats   StringSet
        AlwaysUsePreemptibleInstances bool
+       PreemptiblePriceFactor        float64
        RuntimeEngine                 string
        LocalKeepBlobBuffersPerVCPU   int
        LocalKeepLogsToContainerLog   string
index 7eb7f0f03d57b571e314f8d87ca6714cf7d6563f..5a46635e9102365bbfd01c9c9c120bd8e23a7026 100644 (file)
@@ -9,6 +9,7 @@ import (
        "context"
        "net"
        "net/http"
+       "sync"
        "time"
 
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
@@ -21,7 +22,9 @@ type contextKey struct {
 }
 
 var (
-       requestTimeContextKey = contextKey{"requestTime"}
+       requestTimeContextKey       = contextKey{"requestTime"}
+       responseLogFieldsContextKey = contextKey{"responseLogFields"}
+       mutexContextKey             = contextKey{"mutex"}
 )
 
 type hijacker interface {
@@ -64,6 +67,19 @@ func HandlerWithDeadline(timeout time.Duration, next http.Handler) http.Handler
        })
 }
 
+func SetResponseLogFields(ctx context.Context, fields logrus.Fields) {
+       m, _ := ctx.Value(&mutexContextKey).(*sync.Mutex)
+       c, _ := ctx.Value(&responseLogFieldsContextKey).(logrus.Fields)
+       if m == nil || c == nil {
+               return
+       }
+       m.Lock()
+       defer m.Unlock()
+       for k, v := range fields {
+               c[k] = v
+       }
+}
+
 // LogRequests wraps an http.Handler, logging each request and
 // response.
 func LogRequests(h http.Handler) http.Handler {
@@ -81,6 +97,8 @@ func LogRequests(h http.Handler) http.Handler {
                })
                ctx := req.Context()
                ctx = context.WithValue(ctx, &requestTimeContextKey, time.Now())
+               ctx = context.WithValue(ctx, &responseLogFieldsContextKey, logrus.Fields{})
+               ctx = context.WithValue(ctx, &mutexContextKey, &sync.Mutex{})
                ctx = ctxlog.Context(ctx, lgr)
                req = req.WithContext(ctx)
 
@@ -124,6 +142,9 @@ func logResponse(w *responseTimer, req *http.Request, lgr *logrus.Entry) {
                        "timeWriteBody": stats.Duration(tDone.Sub(writeTime)),
                })
        }
+       if responseLogFields, ok := req.Context().Value(&responseLogFieldsContextKey).(logrus.Fields); ok {
+               lgr = lgr.WithFields(responseLogFields)
+       }
        respCode := w.WroteStatus()
        if respCode == 0 {
                respCode = http.StatusOK
index 0fcdc1e6334957f27a5ff1f10fbdedcf2716609a..2ce0e46b30bd67ad948f832183ab091865c2ea53 100644 (file)
@@ -481,7 +481,7 @@ class _BlockManager(object):
     DEFAULT_PUT_THREADS = 2
     DEFAULT_GET_THREADS = 2
 
-    def __init__(self, keep, copies=None, put_threads=None, num_retries=None, storage_classes_func=None):
+    def __init__(self, keep, copies=None, put_threads=None, num_retries=None, storage_classes_func=None, get_threads=None):
         """keep: KeepClient object to use"""
         self._keep = keep
         self._bufferblocks = collections.OrderedDict()
@@ -492,7 +492,7 @@ class _BlockManager(object):
         self.lock = threading.Lock()
         self.prefetch_enabled = True
         self.num_put_threads = put_threads or _BlockManager.DEFAULT_PUT_THREADS
-        self.num_get_threads = _BlockManager.DEFAULT_GET_THREADS
+        self.num_get_threads = get_threads or _BlockManager.DEFAULT_GET_THREADS
         self.copies = copies
         self.storage_classes = storage_classes_func or (lambda: [])
         self._pending_write_size = 0
@@ -593,7 +593,7 @@ class _BlockManager(object):
                 b = self._prefetch_queue.get()
                 if b is None:
                     return
-                self._keep.get(b)
+                self._keep.get(b, prefetch=True)
             except Exception:
                 _logger.exception("Exception doing block prefetch")
 
@@ -841,9 +841,6 @@ class _BlockManager(object):
         if not self.prefetch_enabled:
             return
 
-        if self._keep.get_from_cache(locator) is not None:
-            return
-
         with self.lock:
             if locator in self._bufferblocks:
                 return
@@ -1099,7 +1096,7 @@ class ArvadosFile(object):
             if size == 0 or offset >= self.size():
                 return b''
             readsegs = locators_and_ranges(self._segments, offset, size)
-            prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE, limit=32)
+            prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE * self.parent._my_block_manager().num_get_threads, limit=32)
 
         locs = set()
         data = []
index a076de6baf622f560f92859db68e7e8cdafc65f9..a44d42b6ac7cd7a4156ab3b8bc4f72f86060e3a0 100644 (file)
@@ -1262,7 +1262,8 @@ class Collection(RichCollectionBase):
                  block_manager=None,
                  replication_desired=None,
                  storage_classes_desired=None,
-                 put_threads=None):
+                 put_threads=None,
+                 get_threads=None):
         """Collection constructor.
 
         :manifest_locator_or_text:
@@ -1311,6 +1312,7 @@ class Collection(RichCollectionBase):
         self.replication_desired = replication_desired
         self._storage_classes_desired = storage_classes_desired
         self.put_threads = put_threads
+        self.get_threads = get_threads
 
         if apiconfig:
             self._config = apiconfig
@@ -1424,7 +1426,12 @@ class Collection(RichCollectionBase):
             copies = (self.replication_desired or
                       self._my_api()._rootDesc.get('defaultCollectionReplication',
                                                    2))
-            self._block_manager = _BlockManager(self._my_keep(), copies=copies, put_threads=self.put_threads, num_retries=self.num_retries, storage_classes_func=self.storage_classes_desired)
+            self._block_manager = _BlockManager(self._my_keep(),
+                                                copies=copies,
+                                                put_threads=self.put_threads,
+                                                num_retries=self.num_retries,
+                                                storage_classes_func=self.storage_classes_desired,
+                                                get_threads=self.get_threads,)
         return self._block_manager
 
     def _remember_api_response(self, response):
index eb682976253b66a3c0c6c0ebd0a9dc5ca306d499..c061c70f0eebbac2ed2025fdecd27865c27139b8 100755 (executable)
@@ -98,6 +98,15 @@ When getting a collection manifest, strip its access tokens before writing
 it.
 """)
 
+parser.add_argument('--threads', type=int, metavar='N', default=4,
+                    help="""
+Set the number of download threads to be used. Take into account that
+using lots of threads will increase the RAM requirements. Default is
+to use 4 threads.
+On high latency installations, using a greater number will improve
+overall throughput.
+""")
+
 def parse_arguments(arguments, stdout, stderr):
     args = parser.parse_args(arguments)
 
@@ -191,7 +200,9 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
 
     try:
         reader = arvados.CollectionReader(
-            col_loc, api_client=api_client, num_retries=args.retries)
+            col_loc, api_client=api_client, num_retries=args.retries,
+            keep_client=arvados.keep.KeepClient(block_cache=arvados.keep.KeepBlockCache((args.threads+1)*64 * 1024 * 1024)),
+            get_threads=args.threads)
     except Exception as error:
         logger.error("failed to read collection: {}".format(error))
         return 1
index 1a83eae944c59f8dde5e3a7c63de8bbe9c62a9c9..7c05cc0a6a2c72ca818686b6eea5c6f0a4874d3d 100644 (file)
@@ -1036,9 +1036,10 @@ class KeepClient(object):
         else:
             return None
 
-    def get_from_cache(self, loc):
+    def get_from_cache(self, loc_s):
         """Fetch a block only if is in the cache, otherwise return None."""
-        slot = self.block_cache.get(loc)
+        locator = KeepLocator(loc_s)
+        slot = self.block_cache.get(locator.md5sum)
         if slot is not None and slot.ready.is_set():
             return slot.get()
         else:
@@ -1057,7 +1058,7 @@ class KeepClient(object):
     def get(self, loc_s, **kwargs):
         return self._get_or_head(loc_s, method="GET", **kwargs)
 
-    def _get_or_head(self, loc_s, method="GET", num_retries=None, request_id=None, headers=None):
+    def _get_or_head(self, loc_s, method="GET", num_retries=None, request_id=None, headers=None, prefetch=False):
         """Get data from Keep.
 
         This method fetches one or more blocks of data from Keep.  It
@@ -1096,6 +1097,13 @@ class KeepClient(object):
             if method == "GET":
                 slot, first = self.block_cache.reserve_cache(locator.md5sum)
                 if not first:
+                    if prefetch:
+                        # this is request for a prefetch, if it is
+                        # already in flight, return immediately.
+                        # clear 'slot' to prevent finally block from
+                        # calling slot.set()
+                        slot = None
+                        return None
                     self.hits_counter.add(1)
                     blob = slot.get()
                     if blob is None:
@@ -1332,6 +1340,3 @@ class KeepClient(object):
             return True
         if os.path.exists(os.path.join(self.local_store, locator.md5sum)):
             return True
-
-    def is_cached(self, locator):
-        return self.block_cache.reserve_cache(expect_hash)
index be8a03fc314d2cf599c16d5f44b1ab61cc9e885d..c383d529e8087da579fcf4ae6814f76a57044e29 100644 (file)
@@ -392,7 +392,8 @@ def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, *
     pagesize = 1000
     kwargs["limit"] = pagesize
     kwargs["count"] = 'none'
-    kwargs["order"] = ["%s %s" % (order_key, "asc" if ascending else "desc"), "uuid asc"]
+    asc = "asc" if ascending else "desc"
+    kwargs["order"] = ["%s %s" % (order_key, asc), "uuid %s" % asc]
     other_filters = kwargs.get("filters", [])
 
     if "select" in kwargs and "uuid" not in kwargs["select"]:
@@ -436,7 +437,7 @@ def keyset_list_all(fn, order_key="created_at", num_retries=0, ascending=True, *
         if firstitem[order_key] == lastitem[order_key]:
             # Got a page where every item has the same order key.
             # Switch to using uuid for paging.
-            nextpage = [[order_key, "=", lastitem[order_key]], ["uuid", ">", lastitem["uuid"]]]
+            nextpage = [[order_key, "=", lastitem[order_key]], ["uuid", ">" if ascending else "<", lastitem["uuid"]]]
             prev_page_all_same_order_key = True
         else:
             # Start from the last order key seen, but skip the last
index 0b8e7c8f8bf2d4615209e0dbdbfd81e6e54b32af..b45a592ecd0fbd1b1d4722bd63f6e1e0b25514dd 100644 (file)
@@ -27,7 +27,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
         def __init__(self, blocks):
             self.blocks = blocks
             self.requests = []
-        def get(self, locator, num_retries=0):
+        def get(self, locator, num_retries=0, prefetch=False):
             self.requests.append(locator)
             return self.blocks.get(locator)
         def get_from_cache(self, locator):
@@ -627,6 +627,7 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
             def __init__(self, blocks, nocache):
                 self.blocks = blocks
                 self.nocache = nocache
+                self.num_get_threads = 1
 
             def block_prefetch(self, loc):
                 pass
index a43e0d40dfe7ed48f5477689d3623afefe952ba3..5cf4993b2f3804d22209ae16db41fc7bc505efd8 100644 (file)
@@ -320,7 +320,7 @@ class ArvadosCollectionsTest(run_test_server.TestCaseWithServers,
         def __init__(self, content, num_retries=0):
             self.content = content
 
-        def get(self, locator, num_retries=0):
+        def get(self, locator, num_retries=0, prefetch=False):
             return self.content[locator]
 
     def test_stream_reader(self):
index 1c0e437b41196bf1e56e2048e11029725b01da2e..4dba9ce3dc7a5105533a526fe3ee304ed60d784c 100644 (file)
@@ -166,10 +166,10 @@ class KeysetListAllTestCase(unittest.TestCase):
 
     def test_onepage_desc(self):
         ks = KeysetTestHelper([[
-            {"limit": 1000, "count": "none", "order": ["created_at desc", "uuid asc"], "filters": []},
+            {"limit": 1000, "count": "none", "order": ["created_at desc", "uuid desc"], "filters": []},
             {"items": [{"created_at": "2", "uuid": "2"}, {"created_at": "1", "uuid": "1"}]}
         ], [
-            {"limit": 1000, "count": "none", "order": ["created_at desc", "uuid asc"], "filters": [["created_at", "<=", "1"], ["uuid", "!=", "1"]]},
+            {"limit": 1000, "count": "none", "order": ["created_at desc", "uuid desc"], "filters": [["created_at", "<=", "1"], ["uuid", "!=", "1"]]},
             {"items": []}
         ]])
 
index 384ffd64b7080190c21a4ec46f2ab7ea31d97c83..7716a3d5cffd9f6ac44d3bb691d06c98ddf7acf2 100644 (file)
@@ -26,8 +26,8 @@ class Arvados::V1::LinksController < ApplicationController
   def get_permissions
     if current_user.andand.can?(manage: @object)
       # find all links and return them
-      @objects = Link.where(link_class: "permission",
-                            head_uuid: params[:uuid])
+      @objects = Link.unscoped.where(link_class: "permission",
+                                     head_uuid: params[:uuid])
       @offset = 0
       @limit = @objects.count
       render_list
@@ -39,14 +39,37 @@ class Arvados::V1::LinksController < ApplicationController
   protected
 
   def find_object_by_uuid
+    if params[:id] && params[:id].match(/\D/)
+      params[:uuid] = params.delete :id
+    end
     if action_name == 'get_permissions'
       # get_permissions accepts a UUID for any kind of object.
       @object = ArvadosModel::resource_class_for_uuid(params[:uuid])
         .readable_by(*@read_users)
         .where(uuid: params[:uuid])
         .first
-    else
+    elsif !current_user
       super
+    else
+      # The usual permission-filtering index query is unnecessarily
+      # inefficient, and doesn't match all permission links that
+      # should be visible (see #18865).  Instead, we look up the link
+      # by UUID, then check whether (a) its tail_uuid is the current
+      # user or (b) its head_uuid is an object the current_user
+      # can_manage.
+      link = Link.unscoped.where(uuid: params[:uuid]).first
+      if link && link.link_class != 'permission'
+        # Not a permission link. Re-fetch using generic
+        # permission-filtering query.
+        super
+      elsif link && (current_user.uuid == link.tail_uuid ||
+                     current_user.can?(manage: link.head_uuid))
+        # Permission granted.
+        @object = link
+      else
+        # Permission denied, i.e., link is invisible => 404.
+        @object = nil
+      end
     end
   end
 
@@ -86,6 +109,32 @@ class Arvados::V1::LinksController < ApplicationController
         k
       end
     end
+
+    # If the provided filters are enough to limit the results to
+    # permission links with specific head_uuids or
+    # tail_uuid=current_user, bypass the normal readable_by query
+    # (which doesn't match all can_manage-able items, see #18865) --
+    # just ensure the current user actually has can_manage permission
+    # for the provided head_uuids, removing any that don't. At that
+    # point the caller's filters are an effective permission filter.
+    if @filters.include?(['link_class', '=', 'permission'])
+      @filters.map do |k|
+        if k[0] == 'tail_uuid' && k[1] == '=' && k[2] == current_user.uuid
+          @objects = Link.unscoped
+        elsif k[0] == 'head_uuid'
+          if k[1] == '=' && current_user.can?(manage: k[2])
+            @objects = Link.unscoped
+          elsif k[1] == 'in'
+            # Modify the filter operand element (k[2]) in place,
+            # removing any non-permitted UUIDs.
+            k[2].select! do |head_uuid|
+              current_user.can?(manage: head_uuid)
+            end
+            @objects = Link.unscoped
+          end
+        end
+      end
+    end
   end
 
 end
index 993a49e5b75e7ecfb782a306df16c74b37fbed4a..52922d32b1868fdb53d8bcd3f1197d149a93bb63 100644 (file)
@@ -116,7 +116,7 @@ class ApiClientAuthorization < ArvadosModel
     clnt
   end
 
-  def self.check_anonymous_user_token token
+  def self.check_anonymous_user_token(token:, remote:)
     case token[0..2]
     when 'v2/'
       _, token_uuid, secret, optional = token.split('/')
@@ -130,11 +130,16 @@ class ApiClientAuthorization < ArvadosModel
       secret = token
     end
 
+    # Usually, the secret is salted
+    salted_secret = OpenSSL::HMAC.hexdigest('sha1', Rails.configuration.Users.AnonymousUserToken, remote)
+
+    # The anonymous token could be specified as a full v2 token in the config,
+    # but the config loader strips it down to the secret part.
     # The anonymous token content and minimum length is verified in lib/config
-    if secret.length >= 0 && secret == Rails.configuration.Users.AnonymousUserToken
+    if secret.length >= 0 && (secret == Rails.configuration.Users.AnonymousUserToken || secret == salted_secret)
       return ApiClientAuthorization.new(user: User.find_by_uuid(anonymous_user_uuid),
                                         uuid: Rails.configuration.ClusterID+"-gj3su-anonymouspublic",
-                                        api_token: token,
+                                        api_token: secret,
                                         api_client: anonymous_user_token_api_client,
                                         scopes: ['GET /'])
     else
@@ -157,7 +162,7 @@ class ApiClientAuthorization < ArvadosModel
     return nil if token.nil? or token.empty?
     remote ||= Rails.configuration.ClusterID
 
-    auth = self.check_anonymous_user_token(token)
+    auth = self.check_anonymous_user_token(token: token, remote: remote)
     if !auth.nil?
       return auth
     end
index 327bf63b5fa057779d6d03d99331b179077611db..07a31d81a8a129dc67acb4aa1a7fa8f39e253ea1 100644 (file)
@@ -220,7 +220,7 @@ class ArvadosModel < ApplicationRecord
   end
 
   def self.default_orders
-    ["#{table_name}.modified_at desc", "#{table_name}.uuid"]
+    ["#{table_name}.modified_at desc", "#{table_name}.uuid desc"]
   end
 
   def self.unique_columns
diff --git a/services/api/db/migrate/20220401153101_fix_created_at_indexes.rb b/services/api/db/migrate/20220401153101_fix_created_at_indexes.rb
new file mode 100644 (file)
index 0000000..590e841
--- /dev/null
@@ -0,0 +1,28 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class FixCreatedAtIndexes < ActiveRecord::Migration[5.2]
+  @@idxtables = [:collections, :container_requests, :groups, :links, :repositories, :users, :virtual_machines, :workflows, :logs]
+
+  def up
+    @@idxtables.each do |table|
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_created_at")
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_created_at_uuid")
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_created_at_and_uuid")
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_modified_at")
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_modified_at_uuid")
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_modified_at_and_uuid")
+
+      ActiveRecord::Base.connection.execute("CREATE INDEX IF NOT EXISTS index_#{table.to_s}_on_created_at_and_uuid ON #{table.to_s} USING btree (created_at, uuid)")
+      ActiveRecord::Base.connection.execute("CREATE INDEX IF NOT EXISTS index_#{table.to_s}_on_modified_at_and_uuid ON #{table.to_s} USING btree (modified_at, uuid)")
+    end
+  end
+
+  def down
+    @@idxtables.each do |table|
+      ActiveRecord::Base.connection.execute("DROP INDEX IF EXISTS index_#{table.to_s}_on_modified_at_and_uuid")
+      ActiveRecord::Base.connection.execute("CREATE INDEX IF NOT EXISTS index_#{table.to_s}_on_modified_at_uuid ON #{table.to_s} USING btree (modified_at desc, uuid asc)")
+    end
+  end
+end
index cfe21f7c9ae29307b42fd25236a1f4c195254da0..e6bba676257118d542333120a5815549d2cb1f64 100644 (file)
@@ -1905,10 +1905,10 @@ CREATE UNIQUE INDEX index_authorized_keys_on_uuid ON public.authorized_keys USIN
 
 
 --
--- Name: index_collections_on_created_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_collections_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_collections_on_created_at ON public.collections USING btree (created_at);
+CREATE INDEX index_collections_on_created_at_and_uuid ON public.collections USING btree (created_at, uuid);
 
 
 --
@@ -1933,17 +1933,10 @@ CREATE INDEX index_collections_on_is_trashed ON public.collections USING btree (
 
 
 --
--- Name: index_collections_on_modified_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_collections_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_collections_on_modified_at ON public.collections USING btree (modified_at);
-
-
---
--- Name: index_collections_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX index_collections_on_modified_at_uuid ON public.collections USING btree (modified_at DESC, uuid);
+CREATE INDEX index_collections_on_modified_at_and_uuid ON public.collections USING btree (modified_at, uuid);
 
 
 --
@@ -1989,10 +1982,17 @@ CREATE INDEX index_container_requests_on_container_uuid ON public.container_requ
 
 
 --
--- Name: index_container_requests_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
+-- Name: index_container_requests_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_container_requests_on_created_at_and_uuid ON public.container_requests USING btree (created_at, uuid);
+
+
+--
+-- Name: index_container_requests_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_container_requests_on_modified_at_uuid ON public.container_requests USING btree (modified_at DESC, uuid);
+CREATE INDEX index_container_requests_on_modified_at_and_uuid ON public.container_requests USING btree (modified_at, uuid);
 
 
 --
@@ -2094,10 +2094,10 @@ CREATE UNIQUE INDEX index_frozen_groups_on_uuid ON public.frozen_groups USING bt
 
 
 --
--- Name: index_groups_on_created_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_groups_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_groups_on_created_at ON public.groups USING btree (created_at);
+CREATE INDEX index_groups_on_created_at_and_uuid ON public.groups USING btree (created_at, uuid);
 
 
 --
@@ -2122,17 +2122,10 @@ CREATE INDEX index_groups_on_is_trashed ON public.groups USING btree (is_trashed
 
 
 --
--- Name: index_groups_on_modified_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_groups_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_groups_on_modified_at ON public.groups USING btree (modified_at);
-
-
---
--- Name: index_groups_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX index_groups_on_modified_at_uuid ON public.groups USING btree (modified_at DESC, uuid);
+CREATE INDEX index_groups_on_modified_at_and_uuid ON public.groups USING btree (modified_at, uuid);
 
 
 --
@@ -2360,10 +2353,10 @@ CREATE UNIQUE INDEX index_keep_services_on_uuid ON public.keep_services USING bt
 
 
 --
--- Name: index_links_on_created_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_links_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_links_on_created_at ON public.links USING btree (created_at);
+CREATE INDEX index_links_on_created_at_and_uuid ON public.links USING btree (created_at, uuid);
 
 
 --
@@ -2374,17 +2367,10 @@ CREATE INDEX index_links_on_head_uuid ON public.links USING btree (head_uuid);
 
 
 --
--- Name: index_links_on_modified_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_links_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_links_on_modified_at ON public.links USING btree (modified_at);
-
-
---
--- Name: index_links_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX index_links_on_modified_at_uuid ON public.links USING btree (modified_at DESC, uuid);
+CREATE INDEX index_links_on_modified_at_and_uuid ON public.links USING btree (modified_at, uuid);
 
 
 --
@@ -2423,10 +2409,10 @@ CREATE UNIQUE INDEX index_links_on_uuid ON public.links USING btree (uuid);
 
 
 --
--- Name: index_logs_on_created_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_logs_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_logs_on_created_at ON public.logs USING btree (created_at);
+CREATE INDEX index_logs_on_created_at_and_uuid ON public.logs USING btree (created_at, uuid);
 
 
 --
@@ -2444,17 +2430,10 @@ CREATE INDEX index_logs_on_event_type ON public.logs USING btree (event_type);
 
 
 --
--- Name: index_logs_on_modified_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_logs_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_logs_on_modified_at ON public.logs USING btree (modified_at);
-
-
---
--- Name: index_logs_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX index_logs_on_modified_at_uuid ON public.logs USING btree (modified_at DESC, uuid);
+CREATE INDEX index_logs_on_modified_at_and_uuid ON public.logs USING btree (modified_at, uuid);
 
 
 --
@@ -2605,10 +2584,17 @@ CREATE UNIQUE INDEX index_pipeline_templates_on_uuid ON public.pipeline_template
 
 
 --
--- Name: index_repositories_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
+-- Name: index_repositories_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_repositories_on_modified_at_uuid ON public.repositories USING btree (modified_at DESC, uuid);
+CREATE INDEX index_repositories_on_created_at_and_uuid ON public.repositories USING btree (created_at, uuid);
+
+
+--
+-- Name: index_repositories_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_repositories_on_modified_at_and_uuid ON public.repositories USING btree (modified_at, uuid);
 
 
 --
@@ -2689,10 +2675,10 @@ CREATE UNIQUE INDEX index_trashed_groups_on_group_uuid ON public.trashed_groups
 
 
 --
--- Name: index_users_on_created_at; Type: INDEX; Schema: public; Owner: -
+-- Name: index_users_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_users_on_created_at ON public.users USING btree (created_at);
+CREATE INDEX index_users_on_created_at_and_uuid ON public.users USING btree (created_at, uuid);
 
 
 --
@@ -2703,17 +2689,10 @@ CREATE UNIQUE INDEX index_users_on_identity_url ON public.users USING btree (ide
 
 
 --
--- Name: index_users_on_modified_at; Type: INDEX; Schema: public; Owner: -
---
-
-CREATE INDEX index_users_on_modified_at ON public.users USING btree (modified_at);
-
-
---
--- Name: index_users_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
+-- Name: index_users_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_users_on_modified_at_uuid ON public.users USING btree (modified_at DESC, uuid);
+CREATE INDEX index_users_on_modified_at_and_uuid ON public.users USING btree (modified_at, uuid);
 
 
 --
@@ -2737,6 +2716,13 @@ CREATE UNIQUE INDEX index_users_on_username ON public.users USING btree (usernam
 CREATE UNIQUE INDEX index_users_on_uuid ON public.users USING btree (uuid);
 
 
+--
+-- Name: index_virtual_machines_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_virtual_machines_on_created_at_and_uuid ON public.virtual_machines USING btree (created_at, uuid);
+
+
 --
 -- Name: index_virtual_machines_on_hostname; Type: INDEX; Schema: public; Owner: -
 --
@@ -2745,10 +2731,10 @@ CREATE INDEX index_virtual_machines_on_hostname ON public.virtual_machines USING
 
 
 --
--- Name: index_virtual_machines_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
+-- Name: index_virtual_machines_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_virtual_machines_on_modified_at_uuid ON public.virtual_machines USING btree (modified_at DESC, uuid);
+CREATE INDEX index_virtual_machines_on_modified_at_and_uuid ON public.virtual_machines USING btree (modified_at, uuid);
 
 
 --
@@ -2766,10 +2752,17 @@ CREATE UNIQUE INDEX index_virtual_machines_on_uuid ON public.virtual_machines US
 
 
 --
--- Name: index_workflows_on_modified_at_uuid; Type: INDEX; Schema: public; Owner: -
+-- Name: index_workflows_on_created_at_and_uuid; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_workflows_on_created_at_and_uuid ON public.workflows USING btree (created_at, uuid);
+
+
+--
+-- Name: index_workflows_on_modified_at_and_uuid; Type: INDEX; Schema: public; Owner: -
 --
 
-CREATE INDEX index_workflows_on_modified_at_uuid ON public.workflows USING btree (modified_at DESC, uuid);
+CREATE INDEX index_workflows_on_modified_at_and_uuid ON public.workflows USING btree (modified_at, uuid);
 
 
 --
@@ -3185,6 +3178,7 @@ INSERT INTO "schema_migrations" (version) VALUES
 ('20211027154300'),
 ('20220224203102'),
 ('20220301155729'),
-('20220303204419');
+('20220303204419'),
+('20220401153101');
 
 
index ab76417902214162506707d3e642f93539ffe7ed..9280aeab935e19748f25ea347592d3d704161f02 100644 (file)
@@ -8,8 +8,8 @@ running:
   cancelled_at: ~
   cancelled_by_user_uuid: ~
   cancelled_by_client_uuid: ~
-  created_at: <%= 3.minute.ago.to_s(:db) %>
-  started_at: <%= 3.minute.ago.to_s(:db) %>
+  created_at: <%= 2.7.minute.ago.to_s(:db) %>
+  started_at: <%= 2.7.minute.ago.to_s(:db) %>
   finished_at: ~
   script: hash
   repository: active/foo
index 9621b3effc1c74f0b832c021b3c9d2b99ef11586..a504c9fadd790cb21e2410cb95cefd082c32cfbf 100644 (file)
@@ -97,7 +97,7 @@ has_job:
   state: Ready
   uuid: zzzzz-d1hrv-1yfj6xkidf2muk3
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  created_at: <%= 3.1.minute.ago.to_s(:db) %>
+  created_at: <%= 2.9.minute.ago.to_s(:db) %>
   components:
    foo:
     script: foo
@@ -112,7 +112,7 @@ components_is_jobspec:
   # Helps test that clients cope with funny-shaped components.
   # For an example, see #3321.
   uuid: zzzzz-d1hrv-1yfj61234abcdk4
-  created_at: <%= 2.minute.ago.to_s(:db) %>
+  created_at: <%= 4.minute.ago.to_s(:db) %>
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
   modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
   modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
@@ -132,7 +132,7 @@ pipeline_with_tagged_collection_input:
   state: Ready
   uuid: zzzzz-d1hrv-1yfj61234abcdk3
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  created_at: <%= 3.1.minute.ago.to_s(:db) %>
+  created_at: <%= 3.2.minute.ago.to_s(:db) %>
   components:
     part-one:
       script_parameters:
@@ -145,7 +145,7 @@ pipeline_to_merge_params:
   uuid: zzzzz-d1hrv-1yfj6dcba4321k3
   pipeline_template_uuid: zzzzz-p5p6p-aox0k0ofxrystgw
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  created_at: <%= 3.1.minute.ago.to_s(:db) %>
+  created_at: <%= 3.3.minute.ago.to_s(:db) %>
   components:
     part-one:
       script_parameters:
@@ -193,7 +193,7 @@ pipeline_instance_owned_by_fuse:
   uuid: zzzzz-d1hrv-ri9dvgkgqs9y09j
   owner_uuid: zzzzz-tpzed-0fusedrivertest
   pipeline_template_uuid: zzzzz-p5p6p-vq4wuvy84xvaq2r
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-16 12:00:00
   name: "pipeline instance owned by FUSE"
   components:
     foo:
@@ -210,7 +210,7 @@ pipeline_instance_in_fuse_project:
   uuid: zzzzz-d1hrv-scarxiyajtshq3l
   owner_uuid: zzzzz-j7d0g-0000ownedbyfuse
   pipeline_template_uuid: zzzzz-p5p6p-vq4wuvy84xvaq2r
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-17 12:00:00
   name: "pipeline instance in FUSE project"
   components:
     foo:
@@ -227,7 +227,7 @@ pipeline_owned_by_active_in_aproject:
   state: Complete
   uuid: zzzzz-d1hrv-ju5ghi0i9z2kqc6
   owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-18 12:00:00
   components:
     foo:
       script: foo
@@ -243,7 +243,7 @@ pipeline_owned_by_active_in_home:
   state: Complete
   uuid: zzzzz-d1hrv-lihrbd0i9z2kqc6
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-19 12:00:00
   components:
     foo:
       script: foo
@@ -287,7 +287,7 @@ pipeline_in_publicly_accessible_project_but_other_objects_elsewhere:
   name: Pipeline in public project with other objects elsewhere
   pipeline_template_uuid: zzzzz-p5p6p-aox0k0ofxrystgw
   state: Complete
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-20 12:00:00
   components:
     foo:
       script: foo
@@ -314,7 +314,7 @@ new_pipeline_in_publicly_accessible_project:
   name: Pipeline in New state in publicly accessible project
   pipeline_template_uuid: zzzzz-p5p6p-tmpltpublicproj
   state: New
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-21 12:00:00
   components:
     foo:
       script: foo
@@ -331,7 +331,7 @@ new_pipeline_in_publicly_accessible_project_but_other_objects_elsewhere:
   name: Pipeline in New state in public project with objects elsewhere
   pipeline_template_uuid: zzzzz-p5p6p-aox0k0ofxrystgw
   state: New
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-22 12:00:00
   components:
     foo:
       script: foo
@@ -348,7 +348,7 @@ new_pipeline_in_publicly_accessible_project_with_dataclass_file_and_other_object
   name: Pipeline in public project in New state with file type data class with objects elsewhere
   pipeline_template_uuid: zzzzz-p5p6p-aox0k0ofxrystgw
   state: New
-  created_at: 2014-09-15 12:00:00
+  created_at: 2014-09-23 12:00:00
   components:
     foo:
       script: foo
@@ -363,8 +363,8 @@ pipeline_in_running_state:
   name: running_with_job
   uuid: zzzzz-d1hrv-runningpipeline
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  created_at: <%= 3.1.minute.ago.to_s(:db) %>
-  started_at: <%= 3.1.minute.ago.to_s(:db) %>
+  created_at: <%= 2.8.minute.ago.to_s(:db) %>
+  started_at: <%= 2.8.minute.ago.to_s(:db) %>
   state: RunningOnServer
   components:
    foo:
@@ -393,7 +393,7 @@ complete_pipeline_with_two_jobs:
   uuid: zzzzz-d1hrv-twodonepipeline
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
   state: Complete
-  created_at: <%= 3.minute.ago.to_s(:db) %>
+  created_at: <%= 2.5.minute.ago.to_s(:db) %>
   started_at: <%= 2.minute.ago.to_s(:db) %>
   finished_at: <%= 1.minute.ago.to_s(:db) %>
   components:
index 9bba418578e89c6dd36009ba2e3278f700d33eb3..fae9dc40c6daf2c9df1e28edae47cd743757db52 100644 (file)
@@ -24,7 +24,7 @@ class Arvados::V1::QueryTest < ActionController::TestCase
       controller: 'logs',
     }
     assert_response :success
-    assert_equal('logs.event_type asc, logs.modified_at desc, logs.uuid',
+    assert_equal('logs.event_type asc, logs.modified_at desc, logs.uuid desc',
                  assigns(:objects).order_values.join(', '))
   end
 
@@ -36,7 +36,7 @@ class Arvados::V1::QueryTest < ActionController::TestCase
       controller: 'logs',
     }
     assert_response :success
-    assert_equal('logs.modified_at asc, logs.uuid',
+    assert_equal('logs.modified_at asc, logs.uuid desc',
                  assigns(:objects).order_values.join(', '))
   end
 
@@ -51,7 +51,7 @@ class Arvados::V1::QueryTest < ActionController::TestCase
       controller: 'logs',
     }
     assert_response :success
-    assert_equal('logs.modified_at asc, logs.event_type desc, logs.uuid',
+    assert_equal('logs.modified_at asc, logs.event_type desc, logs.uuid desc',
                  assigns(:objects).order_values.join(', '))
   end
 
index 9eae518c1d0f7ffd0700fc110de375754713fdc1..65f5adc1d150b0914324ebde3357187214c71bc2 100644 (file)
@@ -451,35 +451,226 @@ class PermissionsTest < ActionDispatch::IntegrationTest
 
     # Should be able to read links directly too
     get "/arvados/v1/links/#{can_read_uuid}",
-        params: {},
       headers: auth(:active)
     assert_response :success
 
+    ### Create some objects of different types (other than projects)
+    ### inside a subproject inside the shared project, and share those
+    ### individual objects with a 3rd user ("spectator").
+    post '/arvados/v1/groups',
+         params: {
+           group: {
+             owner_uuid: groups(:public).uuid,
+             name: 'permission test subproject',
+             group_class: 'project',
+           },
+         },
+         headers: auth(:admin)
+    assert_response :success
+    subproject_uuid = json_response['uuid']
+
+    test_types = ['collection', 'workflow', 'container_request']
+    test_type_create_attrs = {
+      'container_request' => {
+        command: ["echo", "foo"],
+        container_image: links(:docker_image_collection_tag).name,
+        cwd: "/tmp",
+        environment: {},
+        mounts: {"/out" => {kind: "tmp", capacity: 1000000}},
+        output_path: "/out",
+        runtime_constraints: {"vcpus" => 1, "ram" => 2},
+      },
+    }
+
+    test_object = {}
+    test_object_perm_link = {}
+    test_types.each do |test_type|
+      post "/arvados/v1/#{test_type}s",
+           params: {
+             test_type.to_sym => {
+               owner_uuid: subproject_uuid,
+               name: "permission test #{test_type} in subproject",
+             }.merge(test_type_create_attrs[test_type] || {}).to_json,
+           },
+           headers: auth(:admin)
+      assert_response :success
+      test_object[test_type] = json_response
+
+      post '/arvados/v1/links',
+           params: {
+             link: {
+               tail_uuid: users(:spectator).uuid,
+               link_class: 'permission',
+               name: 'can_read',
+               head_uuid: test_object[test_type]['uuid'],
+             }
+           },
+           headers: auth(:admin)
+      assert_response :success
+      test_object_perm_link[test_type] = json_response
+    end
+
+    # The "active-can_manage-project" permission should cause the
+    # "spectator-can_read-object" links to be visible to the "active"
+    # user.
+    test_types.each do |test_type|
+      get "/arvados/v1/permissions/#{test_object[test_type]['uuid']}",
+          headers: auth(:active)
+      assert_response :success
+      perm_uuids = json_response['items'].map { |item| item['uuid'] }
+      assert_includes perm_uuids, test_object_perm_link[test_type]['uuid'], "can_read_uuid not found"
+
+      get "/arvados/v1/links/#{test_object_perm_link[test_type]['uuid']}",
+          headers: auth(:active)
+      assert_response :success
+
+      [
+        ['head_uuid', '=', test_object[test_type]['uuid']],
+        ['head_uuid', 'in', [test_object[test_type]['uuid']]],
+        ['head_uuid', 'in', [users(:admin).uuid, test_object[test_type]['uuid']]],
+      ].each do |filter|
+        get "/arvados/v1/links",
+            params: {
+              filters: ([['link_class', '=', 'permission'], filter]).to_json,
+            },
+            headers: auth(:active)
+        assert_response :success
+        assert_not_empty json_response['items'], "could not find can_read link using index with filter #{filter}"
+        assert_equal test_object_perm_link[test_type]['uuid'], json_response['items'][0]['uuid']
+      end
+
+      # The "spectator-can_read-object" link should be visible to the
+      # subject user ("spectator") in a filter query, even without
+      # can_manage permission on the target object.
+      [
+        ['tail_uuid', '=', users(:spectator).uuid],
+      ].each do |filter|
+        get "/arvados/v1/links",
+            params: {
+              filters: ([['link_class', '=', 'permission'], filter]).to_json,
+            },
+            headers: auth(:spectator)
+        assert_response :success
+        perm_uuids = json_response['items'].map { |item| item['uuid'] }
+        assert_includes perm_uuids, test_object_perm_link[test_type]['uuid'], "could not find can_read link using index with filter #{filter}"
+      end
+    end
+
     ### Now delete the can_manage link
     delete "/arvados/v1/links/#{can_manage_uuid}",
-      params: nil,
       headers: auth(:active)
     assert_response :success
 
     # Should not be able read these permission links again
-    get "/arvados/v1/permissions/#{groups(:public).uuid}",
-      params: nil,
+    test_types.each do |test_type|
+      get "/arvados/v1/permissions/#{groups(:public).uuid}",
+          headers: auth(:active)
+      assert_response 404
+
+      get "/arvados/v1/permissions/#{test_object[test_type]['uuid']}",
+          headers: auth(:active)
+      assert_response 404
+
+      get "/arvados/v1/links",
+          params: {
+            filters: [["link_class", "=", "permission"], ["head_uuid", "=", groups(:public).uuid]].to_json
+          },
+          headers: auth(:active)
+      assert_response :success
+      assert_equal [], json_response['items']
+
+      [
+        ['head_uuid', '=', test_object[test_type]['uuid']],
+        ['head_uuid', 'in', [users(:admin).uuid, test_object[test_type]['uuid']]],
+        ['head_uuid', 'in', []],
+      ].each do |filter|
+        get "/arvados/v1/links",
+            params: {
+              :filters => [["link_class", "=", "permission"], filter].to_json
+            },
+            headers: auth(:active)
+        assert_response :success
+        assert_equal [], json_response['items']
+      end
+
+      # Should not be able to read links directly either
+      get "/arvados/v1/links/#{can_read_uuid}",
+          headers: auth(:active)
+      assert_response 404
+
+      test_types.each do |test_type|
+        get "/arvados/v1/links/#{test_object_perm_link[test_type]['uuid']}",
+            headers: auth(:active)
+        assert_response 404
+      end
+    end
+
+    ### Create a collection, and share it with a direct permission
+    ### link (as opposed to sharing its parent project)
+    post "/arvados/v1/collections",
+      params: {
+        collection: {
+          name: 'permission test',
+        }
+      },
+      headers: auth(:admin)
+    assert_response :success
+    collection_uuid = json_response['uuid']
+    post "/arvados/v1/links",
+      params: {
+        link: {
+          tail_uuid: users(:spectator).uuid,
+          link_class: 'permission',
+          name: 'can_read',
+          head_uuid: collection_uuid,
+          properties: {}
+        }
+      },
+      headers: auth(:admin)
+    assert_response :success
+    can_read_collection_uuid = json_response['uuid']
+
+    # Should not be able read the permission link via permissions API,
+    # because permission is only can_read, not can_manage
+    get "/arvados/v1/permissions/#{collection_uuid}",
       headers: auth(:active)
     assert_response 404
 
-    get "/arvados/v1/links",
-        params: {
-          :filters => [["link_class", "=", "permission"], ["head_uuid", "=", groups(:public).uuid]].to_json
-        },
+    # Should not be able to read the permission link directly, for
+    # same reason
+    get "/arvados/v1/links/#{can_read_collection_uuid}",
       headers: auth(:active)
+    assert_response 404
+
+    ### Now add a can_manage link
+    post "/arvados/v1/links",
+      params: {
+        link: {
+          tail_uuid: users(:active).uuid,
+          link_class: 'permission',
+          name: 'can_manage',
+          head_uuid: collection_uuid,
+          properties: {}
+        }
+      },
+      headers: auth(:admin)
     assert_response :success
-    assert_equal [], json_response['items']
+    can_manage_collection_uuid = json_response['uuid']
 
-    # Should not be able to read links directly either
-    get "/arvados/v1/links/#{can_read_uuid}",
-        params: {},
+    # Should be able read both permission links via permissions API
+    get "/arvados/v1/permissions/#{collection_uuid}",
       headers: auth(:active)
-    assert_response 404
+    assert_response :success
+    perm_uuids = json_response['items'].map { |item| item['uuid'] }
+    assert_includes perm_uuids, can_read_collection_uuid, "can_read_uuid not found"
+    assert_includes perm_uuids, can_manage_collection_uuid, "can_manage_uuid not found"
+
+    # Should be able to read both permission links directly
+    [can_read_collection_uuid, can_manage_collection_uuid].each do |uuid|
+      get "/arvados/v1/links/#{uuid}",
+        headers: auth(:active)
+      assert_response :success
+    end
   end
 
   test "get_permissions returns 404 for nonexistent uuid" do
index 2ee3b3cf94cab5c73f3b4e809db8b6afac7aec81..0548a767ba4cbba9b3b94d6303b65db6cb53c7b8 100644 (file)
@@ -62,7 +62,7 @@ class SelectTest < ActionDispatch::IntegrationTest
       headers: auth(:admin)
     assert_response :success
     uuids = json_response['items'].collect { |i| i['uuid'] }
-    assert_equal uuids, uuids.sort
+    assert_equal uuids, uuids.sort.reverse
   end
 
   def assert_link_classes_ascend(current_class, prev_class)
index 9b35769ef2f0886ab3e1595d280b0a9a133fb83e..aa649e9106c4a403c1d2745c6a2a805a87e5f205 100644 (file)
@@ -1126,7 +1126,8 @@ class ContainerRequestTest < ActiveSupport::TestCase
     end
   end
 
-  test "Having preemptible_instances=true create a committed child container request and verify the scheduling parameter of its container" do
+  test "AlwaysUsePreemptibleInstances makes child containers preemptible" do
+    Rails.configuration.Containers.AlwaysUsePreemptibleInstances = true
     common_attrs = {cwd: "test",
                     priority: 1,
                     command: ["echo", "hello"],
index 7de95a0cb1b0d95bd1d67dcc58b5a3c406a863ff..f3816c0d3e783b6272c5abcc424641a4bb39d6dc 100644 (file)
@@ -525,15 +525,23 @@ class CollectionDirectory(CollectionDirectoryBase):
                         self.collection.update()
                         new_collection_record = self.collection.api_response()
                     else:
+                        # If there's too many prefetch threads and you
+                        # max out the CPU, delivering data to the FUSE
+                        # layer actually ends up being slower.
+                        # Experimentally, capping 7 threads seems to
+                        # be a sweet spot.
+                        get_threads = min(max((self.api.keep.block_cache.cache_max // (64 * 1024 * 1024)) - 1, 1), 7)
                         # Create a new collection object
                         if uuid_pattern.match(self.collection_locator):
                             coll_reader = arvados.collection.Collection(
                                 self.collection_locator, self.api, self.api.keep,
-                                num_retries=self.num_retries)
+                                num_retries=self.num_retries,
+                                get_threads=get_threads)
                         else:
                             coll_reader = arvados.collection.CollectionReader(
                                 self.collection_locator, self.api, self.api.keep,
-                                num_retries=self.num_retries)
+                                num_retries=self.num_retries,
+                                get_threads=get_threads)
                         new_collection_record = coll_reader.api_response() or {}
                         # If the Collection only exists in Keep, there will be no API
                         # response.  Fill in the fields we need.
index ece316193d4ee6a82cf04f6a685f09b0af453cf3..1601db59440be8b57c35b988869a1a56229ef92b 100644 (file)
@@ -1088,6 +1088,7 @@ class FuseFsyncTest(FuseMagicTest):
 class MagicDirApiError(FuseMagicTest):
     def setUp(self):
         api = mock.MagicMock()
+        api.keep.block_cache = mock.MagicMock(cache_max=1)
         super(MagicDirApiError, self).setUp(api=api)
         api.collections().get().execute.side_effect = iter([
             Exception('API fail'),
index 97ec95e3aac3f96111ab49014635ae742073b4e8..ef61b06873c50661bb29f622bfb1b5e9a1097495 100644 (file)
@@ -913,6 +913,14 @@ func (h *handler) logUploadOrDownload(
                        WithField("collection_file_path", filepath)
                props["collection_uuid"] = collection.UUID
                props["collection_file_path"] = filepath
+               // h.determineCollection populates the collection_uuid prop with the PDH, if
+               // this collection is being accessed via PDH. In that case, blank the
+               // collection_uuid field so that consumers of the log entries can rely on it
+               // being a UUID, or blank. The PDH remains available via the
+               // portable_data_hash property.
+               if props["collection_uuid"] == collection.PortableDataHash {
+                       props["collection_uuid"] = ""
+               }
        }
        if r.Method == "PUT" || r.Method == "POST" {
                log.Info("File upload")
diff --git a/services/keepproxy/.gitignore b/services/keepproxy/.gitignore
deleted file mode 100644 (file)
index a4c8ad9..0000000
+++ /dev/null
@@ -1 +0,0 @@
-keepproxy
index c2760a2a4fb462e193f833ac97f59167871c7065..f857ed3e4ebf4859e1355260ce85bc52fa50de90 100644 (file)
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package keepproxy
 
 import (
        "context"
        "errors"
-       "flag"
        "fmt"
        "io"
        "io/ioutil"
        "net"
        "net/http"
-       "os"
-       "os/signal"
        "regexp"
        "strings"
-       "syscall"
        "time"
 
-       "git.arvados.org/arvados.git/lib/cmd"
-       "git.arvados.org/arvados.git/lib/config"
+       "git.arvados.org/arvados.git/lib/service"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/arvadosclient"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
        "git.arvados.org/arvados.git/sdk/go/health"
        "git.arvados.org/arvados.git/sdk/go/httpserver"
        "git.arvados.org/arvados.git/sdk/go/keepclient"
-       "github.com/coreos/go-systemd/daemon"
-       "github.com/ghodss/yaml"
        "github.com/gorilla/mux"
        lru "github.com/hashicorp/golang-lru"
+       "github.com/prometheus/client_golang/prometheus"
        "github.com/sirupsen/logrus"
 )
 
-var version = "dev"
-
-var (
-       listener net.Listener
-       router   http.Handler
-)
-
 const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
 
-func configure(args []string) (*arvados.Cluster, logrus.FieldLogger, error) {
-       prog := args[0]
-       flags := flag.NewFlagSet(prog, flag.ContinueOnError)
-
-       dumpConfig := flags.Bool("dump-config", false, "write current configuration to stdout and exit")
-       getVersion := flags.Bool("version", false, "Print version information and exit.")
-
-       initLogger := logrus.New()
-       initLogger.Formatter = &logrus.JSONFormatter{
-               TimestampFormat: rfc3339NanoFixed,
-       }
-       var logger logrus.FieldLogger = initLogger
-
-       loader := config.NewLoader(os.Stdin, logger)
-       loader.SetupFlags(flags)
-       args = loader.MungeLegacyConfigArgs(logger, args[1:], "-legacy-keepproxy-config")
-
-       if ok, code := cmd.ParseFlags(flags, prog, args, "", os.Stderr); !ok {
-               os.Exit(code)
-       } else if *getVersion {
-               fmt.Printf("keepproxy %s\n", version)
-               return nil, logger, nil
-       }
-
-       cfg, err := loader.Load()
-       if err != nil {
-               return nil, logger, err
-       }
-       cluster, err := cfg.GetCluster("")
-       if err != nil {
-               return nil, logger, err
-       }
-
-       logger = ctxlog.New(os.Stderr, cluster.SystemLogs.Format, cluster.SystemLogs.LogLevel).WithFields(logrus.Fields{
-               "ClusterID": cluster.ClusterID,
-               "PID":       os.Getpid(),
-       })
-
-       if *dumpConfig {
-               out, err := yaml.Marshal(cfg)
-               if err != nil {
-                       return nil, logger, err
-               }
-               if _, err := os.Stdout.Write(out); err != nil {
-                       return nil, logger, err
-               }
-               return nil, logger, nil
-       }
+var Command = service.Command(arvados.ServiceNameKeepproxy, newHandlerOrErrorHandler)
 
-       return cluster, logger, nil
-}
-
-func main() {
-       cluster, logger, err := configure(os.Args)
-       if err != nil {
-               logger.Fatal(err)
-       }
-       if cluster == nil {
-               return
-       }
-
-       logger.Printf("keepproxy %s started", version)
-
-       if err := run(logger, cluster); err != nil {
-               logger.Fatal(err)
-       }
-
-       logger.Println("shutting down")
-}
-
-func run(logger logrus.FieldLogger, cluster *arvados.Cluster) error {
+func newHandlerOrErrorHandler(ctx context.Context, cluster *arvados.Cluster, token string, reg *prometheus.Registry) service.Handler {
        client, err := arvados.NewClientFromConfig(cluster)
        if err != nil {
-               return err
+               return service.ErrorHandler(ctx, cluster, fmt.Errorf("Error setting up arvados client: %w", err))
        }
-       client.AuthToken = cluster.SystemRootToken
-
        arv, err := arvadosclient.New(client)
        if err != nil {
-               return fmt.Errorf("Error setting up arvados client %v", err)
-       }
-
-       // If a config file is available, use the keepstores defined there
-       // instead of the legacy autodiscover mechanism via the API server
-       for k := range cluster.Services.Keepstore.InternalURLs {
-               arv.KeepServiceURIs = append(arv.KeepServiceURIs, strings.TrimRight(k.String(), "/"))
-       }
-
-       if cluster.SystemLogs.LogLevel == "debug" {
-               keepclient.DebugPrintf = logger.Printf
+               return service.ErrorHandler(ctx, cluster, fmt.Errorf("Error setting up arvados client: %w", err))
        }
        kc, err := keepclient.MakeKeepClient(arv)
        if err != nil {
-               return fmt.Errorf("Error setting up keep client %v", err)
+               return service.ErrorHandler(ctx, cluster, fmt.Errorf("Error setting up keep client: %w", err))
        }
        keepclient.RefreshServiceDiscoveryOnSIGHUP()
-
-       if cluster.Collections.DefaultReplication > 0 {
-               kc.Want_replicas = cluster.Collections.DefaultReplication
-       }
-
-       var listen arvados.URL
-       for listen = range cluster.Services.Keepproxy.InternalURLs {
-               break
-       }
-
-       var lErr error
-       listener, lErr = net.Listen("tcp", listen.Host)
-       if lErr != nil {
-               return fmt.Errorf("listen(%s): %v", listen.Host, lErr)
-       }
-
-       if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
-               logger.Printf("Error notifying init daemon: %v", err)
-       }
-       logger.Println("listening at", listener.Addr())
-
-       // Shut down the server gracefully (by closing the listener)
-       // if SIGTERM is received.
-       term := make(chan os.Signal, 1)
-       go func(sig <-chan os.Signal) {
-               s := <-sig
-               logger.Println("caught signal:", s)
-               listener.Close()
-       }(term)
-       signal.Notify(term, syscall.SIGTERM)
-       signal.Notify(term, syscall.SIGINT)
-
-       // Start serving requests.
-       router, err = MakeRESTRouter(kc, time.Duration(keepclient.DefaultProxyRequestTimeout), cluster, logger)
+       router, err := newHandler(ctx, kc, time.Duration(keepclient.DefaultProxyRequestTimeout), cluster)
        if err != nil {
-               return err
-       }
-       server := http.Server{
-               Handler: httpserver.AddRequestIDs(httpserver.LogRequests(router)),
-               BaseContext: func(net.Listener) context.Context {
-                       return ctxlog.Context(context.Background(), logger)
-               },
+               return service.ErrorHandler(ctx, cluster, err)
        }
-       return server.Serve(listener)
+       return router
 }
 
-type TokenCacheEntry struct {
+type tokenCacheEntry struct {
        expire int64
        user   *arvados.User
 }
 
-type APITokenCache struct {
+type apiTokenCache struct {
        tokens     *lru.TwoQueueCache
        expireTime int64
 }
 
 // RememberToken caches the token and set an expire time.  If the
 // token is already in the cache, it is not updated.
-func (cache *APITokenCache) RememberToken(token string, user *arvados.User) {
+func (cache *apiTokenCache) RememberToken(token string, user *arvados.User) {
        now := time.Now().Unix()
        _, ok := cache.tokens.Get(token)
        if !ok {
-               cache.tokens.Add(token, TokenCacheEntry{
+               cache.tokens.Add(token, tokenCacheEntry{
                        expire: now + cache.expireTime,
                        user:   user,
                })
@@ -211,13 +79,13 @@ func (cache *APITokenCache) RememberToken(token string, user *arvados.User) {
 
 // RecallToken checks if the cached token is known and still believed to be
 // valid.
-func (cache *APITokenCache) RecallToken(token string) (bool, *arvados.User) {
+func (cache *apiTokenCache) RecallToken(token string) (bool, *arvados.User) {
        val, ok := cache.tokens.Get(token)
        if !ok {
                return false, nil
        }
 
-       cacheEntry := val.(TokenCacheEntry)
+       cacheEntry := val.(tokenCacheEntry)
        now := time.Now().Unix()
        if now < cacheEntry.expire {
                // Token is known and still valid
@@ -229,18 +97,15 @@ func (cache *APITokenCache) RecallToken(token string) (bool, *arvados.User) {
        }
 }
 
-// GetRemoteAddress returns a string with the remote address for the request.
-// If the X-Forwarded-For header is set and has a non-zero length, it returns a
-// string made from a comma separated list of all the remote addresses,
-// starting with the one(s) from the X-Forwarded-For header.
-func GetRemoteAddress(req *http.Request) string {
-       if xff := req.Header.Get("X-Forwarded-For"); xff != "" {
-               return xff + "," + req.RemoteAddr
-       }
-       return req.RemoteAddr
+func (h *proxyHandler) Done() <-chan struct{} {
+       return nil
+}
+
+func (h *proxyHandler) CheckHealth() error {
+       return nil
 }
 
-func (h *proxyHandler) CheckAuthorizationHeader(req *http.Request) (pass bool, tok string, user *arvados.User) {
+func (h *proxyHandler) checkAuthorizationHeader(req *http.Request) (pass bool, tok string, user *arvados.User) {
        parts := strings.SplitN(req.Header.Get("Authorization"), " ", 2)
        if len(parts) < 2 || !(parts[0] == "OAuth2" || parts[0] == "Bearer") || len(parts[1]) == 0 {
                return false, "", nil
@@ -258,7 +123,7 @@ func (h *proxyHandler) CheckAuthorizationHeader(req *http.Request) (pass bool, t
                op = "write"
        }
 
-       if ok, user := h.APITokenCache.RecallToken(op + ":" + tok); ok {
+       if ok, user := h.apiTokenCache.RecallToken(op + ":" + tok); ok {
                // Valid in the cache, short circuit
                return true, tok, user
        }
@@ -282,7 +147,7 @@ func (h *proxyHandler) CheckAuthorizationHeader(req *http.Request) (pass bool, t
                }
        }
        if err != nil {
-               ctxlog.FromContext(req.Context()).Printf("%s: CheckAuthorizationHeader error: %v", GetRemoteAddress(req), err)
+               ctxlog.FromContext(req.Context()).WithError(err).Info("checkAuthorizationHeader error")
                return false, "", nil
        }
 
@@ -305,7 +170,7 @@ func (h *proxyHandler) CheckAuthorizationHeader(req *http.Request) (pass bool, t
        }
 
        // Success!  Update cache
-       h.APITokenCache.RememberToken(op+":"+tok, user)
+       h.apiTokenCache.RememberToken(op+":"+tok, user)
 
        return true, tok, user
 }
@@ -321,16 +186,13 @@ var defaultTransport = *(http.DefaultTransport.(*http.Transport))
 type proxyHandler struct {
        http.Handler
        *keepclient.KeepClient
-       *APITokenCache
+       *apiTokenCache
        timeout   time.Duration
        transport *http.Transport
-       logger    logrus.FieldLogger
        cluster   *arvados.Cluster
 }
 
-// MakeRESTRouter returns an http.Handler that passes GET and PUT
-// requests to the appropriate handlers.
-func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, cluster *arvados.Cluster, logger logrus.FieldLogger) (http.Handler, error) {
+func newHandler(ctx context.Context, kc *keepclient.KeepClient, timeout time.Duration, cluster *arvados.Cluster) (service.Handler, error) {
        rest := mux.NewRouter()
 
        transport := defaultTransport
@@ -352,11 +214,10 @@ func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, cluster *a
                KeepClient: kc,
                timeout:    timeout,
                transport:  &transport,
-               APITokenCache: &APITokenCache{
+               apiTokenCache: &apiTokenCache{
                        tokens:     cacheQ,
                        expireTime: 300,
                },
-               logger:  logger,
                cluster: cluster,
        }
 
@@ -380,7 +241,7 @@ func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, cluster *a
                Prefix: "/_health/",
        }).Methods("GET")
 
-       rest.NotFoundHandler = InvalidPathHandler{}
+       rest.NotFoundHandler = invalidPathHandler{}
        return h, nil
 }
 
@@ -388,30 +249,28 @@ var errLoopDetected = errors.New("loop detected")
 
 func (h *proxyHandler) checkLoop(resp http.ResponseWriter, req *http.Request) error {
        if via := req.Header.Get("Via"); strings.Index(via, " "+viaAlias) >= 0 {
-               h.logger.Printf("proxy loop detected (request has Via: %q): perhaps keepproxy is misidentified by gateway config as an external client, or its keep_services record does not have service_type=proxy?", via)
+               ctxlog.FromContext(req.Context()).Printf("proxy loop detected (request has Via: %q): perhaps keepproxy is misidentified by gateway config as an external client, or its keep_services record does not have service_type=proxy?", via)
                http.Error(resp, errLoopDetected.Error(), http.StatusInternalServerError)
                return errLoopDetected
        }
        return nil
 }
 
-func SetCorsHeaders(resp http.ResponseWriter) {
+func setCORSHeaders(resp http.ResponseWriter) {
        resp.Header().Set("Access-Control-Allow-Methods", "GET, HEAD, POST, PUT, OPTIONS")
        resp.Header().Set("Access-Control-Allow-Origin", "*")
        resp.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Length, Content-Type, X-Keep-Desired-Replicas")
        resp.Header().Set("Access-Control-Max-Age", "86486400")
 }
 
-type InvalidPathHandler struct{}
+type invalidPathHandler struct{}
 
-func (InvalidPathHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
-       ctxlog.FromContext(req.Context()).Printf("%s: %s %s unroutable", GetRemoteAddress(req), req.Method, req.URL.Path)
+func (invalidPathHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        http.Error(resp, "Bad request", http.StatusBadRequest)
 }
 
 func (h *proxyHandler) Options(resp http.ResponseWriter, req *http.Request) {
-       ctxlog.FromContext(req.Context()).Printf("%s: %s %s", GetRemoteAddress(req), req.Method, req.URL.Path)
-       SetCorsHeaders(resp)
+       setCORSHeaders(resp)
 }
 
 var errBadAuthorizationHeader = errors.New("Missing or invalid Authorization header, or method not allowed")
@@ -424,7 +283,7 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
        if err := h.checkLoop(resp, req); err != nil {
                return
        }
-       SetCorsHeaders(resp)
+       setCORSHeaders(resp)
        resp.Header().Set("Via", req.Proto+" "+viaAlias)
 
        locator := mux.Vars(req)["locator"]
@@ -433,8 +292,15 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
        var expectLength, responseLength int64
        var proxiedURI = "-"
 
+       logger := ctxlog.FromContext(req.Context())
        defer func() {
-               h.logger.Println(GetRemoteAddress(req), req.Method, req.URL.Path, status, expectLength, responseLength, proxiedURI, err)
+               httpserver.SetResponseLogFields(req.Context(), logrus.Fields{
+                       "locator":        locator,
+                       "expectLength":   expectLength,
+                       "responseLength": responseLength,
+                       "proxiedURI":     proxiedURI,
+                       "err":            err,
+               })
                if status != http.StatusOK {
                        http.Error(resp, err.Error(), status)
                }
@@ -445,10 +311,14 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
        var pass bool
        var tok string
        var user *arvados.User
-       if pass, tok, user = h.CheckAuthorizationHeader(req); !pass {
+       if pass, tok, user = h.checkAuthorizationHeader(req); !pass {
                status, err = http.StatusForbidden, errBadAuthorizationHeader
                return
        }
+       httpserver.SetResponseLogFields(req.Context(), logrus.Fields{
+               "userUUID":     user.UUID,
+               "userFullName": user.FullName,
+       })
 
        // Copy ArvadosClient struct and use the client's API token
        arvclient := *kc.Arvados
@@ -459,18 +329,6 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
 
        locator = removeHint.ReplaceAllString(locator, "$1")
 
-       if locator != "" {
-               parts := strings.SplitN(locator, "+", 3)
-               if len(parts) >= 2 {
-                       logger := h.logger
-                       if user != nil {
-                               logger = logger.WithField("user_uuid", user.UUID).
-                                       WithField("user_full_name", user.FullName)
-                       }
-                       logger.WithField("locator", fmt.Sprintf("%s+%s", parts[0], parts[1])).Infof("Block download")
-               }
-       }
-
        switch req.Method {
        case "HEAD":
                expectLength, proxiedURI, err = kc.Ask(locator)
@@ -485,7 +343,7 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
        }
 
        if expectLength == -1 {
-               h.logger.Println("Warning:", GetRemoteAddress(req), req.Method, proxiedURI, "Content-Length not provided")
+               logger.Warn("Content-Length not provided")
        }
 
        switch respErr := err.(type) {
@@ -521,7 +379,7 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
        if err := h.checkLoop(resp, req); err != nil {
                return
        }
-       SetCorsHeaders(resp)
+       setCORSHeaders(resp)
        resp.Header().Set("Via", "HTTP/1.1 "+viaAlias)
 
        kc := h.makeKeepClient(req)
@@ -533,7 +391,13 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
        var locatorOut string = "-"
 
        defer func() {
-               h.logger.Println(GetRemoteAddress(req), req.Method, req.URL.Path, status, expectLength, kc.Want_replicas, wroteReplicas, locatorOut, err)
+               httpserver.SetResponseLogFields(req.Context(), logrus.Fields{
+                       "expectLength":  expectLength,
+                       "wantReplicas":  kc.Want_replicas,
+                       "wroteReplicas": wroteReplicas,
+                       "locator":       strings.SplitN(locatorOut, "+A", 2)[0],
+                       "err":           err,
+               })
                if status != http.StatusOK {
                        http.Error(resp, err.Error(), status)
                }
@@ -572,11 +436,15 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
        var pass bool
        var tok string
        var user *arvados.User
-       if pass, tok, user = h.CheckAuthorizationHeader(req); !pass {
+       if pass, tok, user = h.checkAuthorizationHeader(req); !pass {
                err = errBadAuthorizationHeader
                status = http.StatusForbidden
                return
        }
+       httpserver.SetResponseLogFields(req.Context(), logrus.Fields{
+               "userUUID":     user.UUID,
+               "userFullName": user.FullName,
+       })
 
        // Copy ArvadosClient struct and use the client's API token
        arvclient := *kc.Arvados
@@ -605,18 +473,6 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
                locatorOut, wroteReplicas, err = kc.PutHR(locatorIn, req.Body, expectLength)
        }
 
-       if locatorOut != "" {
-               parts := strings.SplitN(locatorOut, "+", 3)
-               if len(parts) >= 2 {
-                       logger := h.logger
-                       if user != nil {
-                               logger = logger.WithField("user_uuid", user.UUID).
-                                       WithField("user_full_name", user.FullName)
-                       }
-                       logger.WithField("locator", fmt.Sprintf("%s+%s", parts[0], parts[1])).Infof("Block upload")
-               }
-       }
-
        // Tell the client how many successful PUTs we accomplished
        resp.Header().Set(keepclient.XKeepReplicasStored, fmt.Sprintf("%d", wroteReplicas))
 
@@ -658,7 +514,7 @@ func (h *proxyHandler) Put(resp http.ResponseWriter, req *http.Request) {
 //   Aborts on any errors
 // Concatenates responses from all those keep servers and returns
 func (h *proxyHandler) Index(resp http.ResponseWriter, req *http.Request) {
-       SetCorsHeaders(resp)
+       setCORSHeaders(resp)
 
        prefix := mux.Vars(req)["prefix"]
        var err error
@@ -671,7 +527,7 @@ func (h *proxyHandler) Index(resp http.ResponseWriter, req *http.Request) {
        }()
 
        kc := h.makeKeepClient(req)
-       ok, token, _ := h.CheckAuthorizationHeader(req)
+       ok, token, _ := h.checkAuthorizationHeader(req)
        if !ok {
                status, err = http.StatusForbidden, errBadAuthorizationHeader
                return
diff --git a/services/keepproxy/keepproxy.service b/services/keepproxy/keepproxy.service
deleted file mode 100644 (file)
index 9548cb2..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-[Unit]
-Description=Arvados Keep Proxy
-Documentation=https://doc.arvados.org/
-After=network.target
-
-# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
-StartLimitIntervalSec=0
-
-[Service]
-Type=notify
-ExecStart=/usr/bin/keepproxy
-# Set a reasonable default for the open file limit
-LimitNOFILE=65536
-Restart=always
-RestartSec=1
-
-# systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
-StartLimitInterval=0
-
-[Install]
-WantedBy=multi-user.target
index 052109bf29b925af9ef8945274ce099c96e9f112..8242f5b2b56868b23dfaecf4aefe814100e682ee 100644 (file)
@@ -2,14 +2,16 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package keepproxy
 
 import (
        "bytes"
+       "context"
        "crypto/md5"
        "fmt"
        "io/ioutil"
        "math/rand"
+       "net"
        "net/http"
        "net/http/httptest"
        "strings"
@@ -22,6 +24,7 @@ import (
        "git.arvados.org/arvados.git/sdk/go/arvadosclient"
        "git.arvados.org/arvados.git/sdk/go/arvadostest"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "git.arvados.org/arvados.git/sdk/go/httpserver"
        "git.arvados.org/arvados.git/sdk/go/keepclient"
        log "github.com/sirupsen/logrus"
 
@@ -54,27 +57,6 @@ type NoKeepServerSuite struct{}
 
 var TestProxyUUID = "zzzzz-bi6l4-lrixqc4fxofbmzz"
 
-// Wait (up to 1 second) for keepproxy to listen on a port. This
-// avoids a race condition where we hit a "connection refused" error
-// because we start testing the proxy too soon.
-func waitForListener() {
-       const (
-               ms = 5
-       )
-       for i := 0; listener == nil && i < 10000; i += ms {
-               time.Sleep(ms * time.Millisecond)
-       }
-       if listener == nil {
-               panic("Timed out waiting for listener to start")
-       }
-}
-
-func closeListener() {
-       if listener != nil {
-               listener.Close()
-       }
-}
-
 func (s *ServerRequiredSuite) SetUpSuite(c *C) {
        arvadostest.StartKeep(2, false)
 }
@@ -111,7 +93,12 @@ func (s *NoKeepServerSuite) SetUpTest(c *C) {
        arvadostest.ResetEnv()
 }
 
-func runProxy(c *C, bogusClientToken bool, loadKeepstoresFromConfig bool, kp *arvados.UploadDownloadRolePermissions) (*keepclient.KeepClient, *bytes.Buffer) {
+type testServer struct {
+       *httpserver.Server
+       proxyHandler *proxyHandler
+}
+
+func runProxy(c *C, bogusClientToken bool, loadKeepstoresFromConfig bool, kp *arvados.UploadDownloadRolePermissions) (*testServer, *keepclient.KeepClient, *bytes.Buffer) {
        cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
        c.Assert(err, Equals, nil)
        cluster, err := cfg.GetCluster("")
@@ -128,38 +115,47 @@ func runProxy(c *C, bogusClientToken bool, loadKeepstoresFromConfig bool, kp *ar
                cluster.Collections.KeepproxyPermission = *kp
        }
 
-       listener = nil
        logbuf := &bytes.Buffer{}
        logger := log.New()
        logger.Out = logbuf
-       go func() {
-               run(logger, cluster)
-               defer closeListener()
-       }()
-       waitForListener()
+       ctx := ctxlog.Context(context.Background(), logger)
+
+       handler := newHandlerOrErrorHandler(ctx, cluster, cluster.SystemRootToken, nil).(*proxyHandler)
+       srv := &testServer{
+               Server: &httpserver.Server{
+                       Server: http.Server{
+                               BaseContext: func(net.Listener) context.Context { return ctx },
+                               Handler: httpserver.AddRequestIDs(
+                                       httpserver.LogRequests(handler)),
+                       },
+                       Addr: ":",
+               },
+               proxyHandler: handler,
+       }
+       err = srv.Start()
+       c.Assert(err, IsNil)
 
        client := arvados.NewClientFromEnv()
        arv, err := arvadosclient.New(client)
-       c.Assert(err, Equals, nil)
+       c.Assert(err, IsNil)
        if bogusClientToken {
                arv.ApiToken = "bogus-token"
        }
        kc := keepclient.New(arv)
        sr := map[string]string{
-               TestProxyUUID: "http://" + listener.Addr().String(),
+               TestProxyUUID: "http://" + srv.Addr,
        }
        kc.SetServiceRoots(sr, sr, sr)
        kc.Arvados.External = true
-
-       return kc, logbuf
+       return srv, kc, logbuf
 }
 
 func (s *ServerRequiredSuite) TestResponseViaHeader(c *C) {
-       runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, _, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
 
        req, err := http.NewRequest("POST",
-               "http://"+listener.Addr().String()+"/",
+               "http://"+srv.Addr+"/",
                strings.NewReader("TestViaHeader"))
        c.Assert(err, Equals, nil)
        req.Header.Add("Authorization", "OAuth2 "+arvadostest.ActiveToken)
@@ -172,7 +168,7 @@ func (s *ServerRequiredSuite) TestResponseViaHeader(c *C) {
        resp.Body.Close()
 
        req, err = http.NewRequest("GET",
-               "http://"+listener.Addr().String()+"/"+string(locator),
+               "http://"+srv.Addr+"/"+string(locator),
                nil)
        c.Assert(err, Equals, nil)
        resp, err = (&http.Client{}).Do(req)
@@ -182,13 +178,13 @@ func (s *ServerRequiredSuite) TestResponseViaHeader(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestLoopDetection(c *C) {
-       kc, _ := runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, kc, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
 
        sr := map[string]string{
-               TestProxyUUID: "http://" + listener.Addr().String(),
+               TestProxyUUID: "http://" + srv.Addr,
        }
-       router.(*proxyHandler).KeepClient.SetServiceRoots(sr, sr, sr)
+       srv.proxyHandler.KeepClient.SetServiceRoots(sr, sr, sr)
 
        content := []byte("TestLoopDetection")
        _, _, err := kc.PutB(content)
@@ -200,8 +196,8 @@ func (s *ServerRequiredSuite) TestLoopDetection(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestStorageClassesHeader(c *C) {
-       kc, _ := runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, kc, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
 
        // Set up fake keepstore to record request headers
        var hdr http.Header
@@ -216,7 +212,7 @@ func (s *ServerRequiredSuite) TestStorageClassesHeader(c *C) {
        sr := map[string]string{
                TestProxyUUID: ts.URL,
        }
-       router.(*proxyHandler).KeepClient.SetServiceRoots(sr, sr, sr)
+       srv.proxyHandler.KeepClient.SetServiceRoots(sr, sr, sr)
 
        // Set up client to ask for storage classes to keepproxy
        kc.StorageClasses = []string{"secure"}
@@ -227,15 +223,15 @@ func (s *ServerRequiredSuite) TestStorageClassesHeader(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestStorageClassesConfirmedHeader(c *C) {
-       runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, _, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
 
        content := []byte("foo")
        hash := fmt.Sprintf("%x", md5.Sum(content))
        client := &http.Client{}
 
        req, err := http.NewRequest("PUT",
-               fmt.Sprintf("http://%s/%s", listener.Addr().String(), hash),
+               fmt.Sprintf("http://%s/%s", srv.Addr, hash),
                bytes.NewReader(content))
        c.Assert(err, IsNil)
        req.Header.Set("X-Keep-Storage-Classes", "default")
@@ -249,8 +245,8 @@ func (s *ServerRequiredSuite) TestStorageClassesConfirmedHeader(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestDesiredReplicas(c *C) {
-       kc, _ := runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, kc, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
 
        content := []byte("TestDesiredReplicas")
        hash := fmt.Sprintf("%x", md5.Sum(content))
@@ -270,8 +266,8 @@ func (s *ServerRequiredSuite) TestDesiredReplicas(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestPutWrongContentLength(c *C) {
-       kc, _ := runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, kc, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
 
        content := []byte("TestPutWrongContentLength")
        hash := fmt.Sprintf("%x", md5.Sum(content))
@@ -281,7 +277,7 @@ func (s *ServerRequiredSuite) TestPutWrongContentLength(c *C) {
        // fixes the invalid Content-Length header. In order to test
        // our server behavior, we have to call the handler directly
        // using an httptest.ResponseRecorder.
-       rtr, err := MakeRESTRouter(kc, 10*time.Second, &arvados.Cluster{}, log.New())
+       rtr, err := newHandler(context.Background(), kc, 10*time.Second, &arvados.Cluster{})
        c.Assert(err, check.IsNil)
 
        type testcase struct {
@@ -296,7 +292,7 @@ func (s *ServerRequiredSuite) TestPutWrongContentLength(c *C) {
                {"abcdef", http.StatusLengthRequired},
        } {
                req, err := http.NewRequest("PUT",
-                       fmt.Sprintf("http://%s/%s+%d", listener.Addr().String(), hash, len(content)),
+                       fmt.Sprintf("http://%s/%s+%d", srv.Addr, hash, len(content)),
                        bytes.NewReader(content))
                c.Assert(err, IsNil)
                req.Header.Set("Content-Length", t.sendLength)
@@ -310,9 +306,9 @@ func (s *ServerRequiredSuite) TestPutWrongContentLength(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestManyFailedPuts(c *C) {
-       kc, _ := runProxy(c, false, false, nil)
-       defer closeListener()
-       router.(*proxyHandler).timeout = time.Nanosecond
+       srv, kc, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
+       srv.proxyHandler.timeout = time.Nanosecond
 
        buf := make([]byte, 1<<20)
        rand.Read(buf)
@@ -337,8 +333,8 @@ func (s *ServerRequiredSuite) TestManyFailedPuts(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
-       kc, logbuf := runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, kc, logbuf := runProxy(c, false, false, nil)
+       defer srv.Close()
 
        hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
        var hash2 string
@@ -374,7 +370,7 @@ func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
                c.Check(err, Equals, nil)
                c.Log("Finished PutB (expected success)")
 
-               c.Check(logbuf.String(), Matches, `(?ms).*msg="Block upload" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="TestCase Administrator" user_uuid=zzzzz-tpzed-d9tiejq69daie8f.*`)
+               c.Check(logbuf.String(), Matches, `(?ms).* locator=acbd18db4cc2f85cedef654fccc4a4d8\+3.* userFullName="TestCase Administrator".* userUUID=zzzzz-tpzed-d9tiejq69daie8f.*`)
                logbuf.Reset()
        }
 
@@ -383,7 +379,7 @@ func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
                c.Assert(err, Equals, nil)
                c.Check(blocklen, Equals, int64(3))
                c.Log("Finished Ask (expected success)")
-               c.Check(logbuf.String(), Matches, `(?ms).*msg="Block download" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="TestCase Administrator" user_uuid=zzzzz-tpzed-d9tiejq69daie8f.*`)
+               c.Check(logbuf.String(), Matches, `(?ms).* locator=acbd18db4cc2f85cedef654fccc4a4d8\+3.* userFullName="TestCase Administrator".* userUUID=zzzzz-tpzed-d9tiejq69daie8f.*`)
                logbuf.Reset()
        }
 
@@ -395,7 +391,7 @@ func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
                c.Check(all, DeepEquals, []byte("foo"))
                c.Check(blocklen, Equals, int64(3))
                c.Log("Finished Get (expected success)")
-               c.Check(logbuf.String(), Matches, `(?ms).*msg="Block download" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="TestCase Administrator" user_uuid=zzzzz-tpzed-d9tiejq69daie8f.*`)
+               c.Check(logbuf.String(), Matches, `(?ms).* locator=acbd18db4cc2f85cedef654fccc4a4d8\+3.* userFullName="TestCase Administrator".* userUUID=zzzzz-tpzed-d9tiejq69daie8f.*`)
                logbuf.Reset()
        }
 
@@ -421,8 +417,8 @@ func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestPutAskGetForbidden(c *C) {
-       kc, _ := runProxy(c, true, false, nil)
-       defer closeListener()
+       srv, kc, _ := runProxy(c, true, false, nil)
+       defer srv.Close()
 
        hash := fmt.Sprintf("%x+3", md5.Sum([]byte("bar")))
 
@@ -455,8 +451,8 @@ func testPermission(c *C, admin bool, perm arvados.UploadDownloadPermission) {
                kp.User = perm
        }
 
-       kc, logbuf := runProxy(c, false, false, &kp)
-       defer closeListener()
+       srv, kc, logbuf := runProxy(c, false, false, &kp)
+       defer srv.Close()
        if admin {
                kc.Arvados.ApiToken = arvadostest.AdminToken
        } else {
@@ -477,10 +473,10 @@ func testPermission(c *C, admin bool, perm arvados.UploadDownloadPermission) {
                        c.Check(err, Equals, nil)
                        c.Log("Finished PutB (expected success)")
                        if admin {
-                               c.Check(logbuf.String(), Matches, `(?ms).*msg="Block upload" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="TestCase Administrator" user_uuid=zzzzz-tpzed-d9tiejq69daie8f.*`)
+                               c.Check(logbuf.String(), Matches, `(?ms).* locator=acbd18db4cc2f85cedef654fccc4a4d8\+3.* userFullName="TestCase Administrator".* userUUID=zzzzz-tpzed-d9tiejq69daie8f.*`)
                        } else {
 
-                               c.Check(logbuf.String(), Matches, `(?ms).*msg="Block upload" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="Active User" user_uuid=zzzzz-tpzed-xurymjxw79nv3jz.*`)
+                               c.Check(logbuf.String(), Matches, `(?ms).* locator=acbd18db4cc2f85cedef654fccc4a4d8\+3.* userFullName="Active User".* userUUID=zzzzz-tpzed-xurymjxw79nv3jz.*`)
                        }
                } else {
                        c.Check(hash2, Equals, "")
@@ -501,9 +497,9 @@ func testPermission(c *C, admin bool, perm arvados.UploadDownloadPermission) {
                        c.Check(blocklen, Equals, int64(3))
                        c.Log("Finished Get (expected success)")
                        if admin {
-                               c.Check(logbuf.String(), Matches, `(?ms).*msg="Block download" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="TestCase Administrator" user_uuid=zzzzz-tpzed-d9tiejq69daie8f.*`)
+                               c.Check(logbuf.String(), Matches, `(?ms).* locator=acbd18db4cc2f85cedef654fccc4a4d8\+3.* userFullName="TestCase Administrator".* userUUID=zzzzz-tpzed-d9tiejq69daie8f.*`)
                        } else {
-                               c.Check(logbuf.String(), Matches, `(?ms).*msg="Block download" locator=acbd18db4cc2f85cedef654fccc4a4d8\+3 user_full_name="Active User" user_uuid=zzzzz-tpzed-xurymjxw79nv3jz.*`)
+                               c.Check(logbuf.String(), Matches, `(?ms).* locator=acbd18db4cc2f85cedef654fccc4a4d8\+3.* userFullName="Active User".* userUUID=zzzzz-tpzed-xurymjxw79nv3jz.*`)
                        }
                } else {
                        c.Check(err, FitsTypeOf, &keepclient.ErrNotFound{})
@@ -545,13 +541,13 @@ func (s *ServerRequiredSuite) TestPutGetPermission(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestCorsHeaders(c *C) {
-       runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, _, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
 
        {
                client := http.Client{}
                req, err := http.NewRequest("OPTIONS",
-                       fmt.Sprintf("http://%s/%x+3", listener.Addr().String(), md5.Sum([]byte("foo"))),
+                       fmt.Sprintf("http://%s/%x+3", srv.Addr, md5.Sum([]byte("foo"))),
                        nil)
                c.Assert(err, IsNil)
                req.Header.Add("Access-Control-Request-Method", "PUT")
@@ -567,8 +563,7 @@ func (s *ServerRequiredSuite) TestCorsHeaders(c *C) {
        }
 
        {
-               resp, err := http.Get(
-                       fmt.Sprintf("http://%s/%x+3", listener.Addr().String(), md5.Sum([]byte("foo"))))
+               resp, err := http.Get(fmt.Sprintf("http://%s/%x+3", srv.Addr, md5.Sum([]byte("foo"))))
                c.Check(err, Equals, nil)
                c.Check(resp.Header.Get("Access-Control-Allow-Headers"), Equals, "Authorization, Content-Length, Content-Type, X-Keep-Desired-Replicas")
                c.Check(resp.Header.Get("Access-Control-Allow-Origin"), Equals, "*")
@@ -576,13 +571,13 @@ func (s *ServerRequiredSuite) TestCorsHeaders(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestPostWithoutHash(c *C) {
-       runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, _, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
 
        {
                client := http.Client{}
                req, err := http.NewRequest("POST",
-                       "http://"+listener.Addr().String()+"/",
+                       "http://"+srv.Addr+"/",
                        strings.NewReader("qux"))
                c.Check(err, IsNil)
                req.Header.Add("Authorization", "OAuth2 "+arvadostest.ActiveToken)
@@ -634,8 +629,8 @@ func (s *ServerRequiredConfigYmlSuite) TestGetIndex(c *C) {
 }
 
 func getIndexWorker(c *C, useConfig bool) {
-       kc, _ := runProxy(c, false, useConfig, nil)
-       defer closeListener()
+       srv, kc, _ := runProxy(c, false, useConfig, nil)
+       defer srv.Close()
 
        // Put "index-data" blocks
        data := []byte("index-data")
@@ -697,8 +692,8 @@ func getIndexWorker(c *C, useConfig bool) {
 }
 
 func (s *ServerRequiredSuite) TestCollectionSharingToken(c *C) {
-       kc, _ := runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, kc, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
        hash, _, err := kc.PutB([]byte("shareddata"))
        c.Check(err, IsNil)
        kc.Arvados.ApiToken = arvadostest.FooCollectionSharingToken
@@ -710,8 +705,8 @@ func (s *ServerRequiredSuite) TestCollectionSharingToken(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestPutAskGetInvalidToken(c *C) {
-       kc, _ := runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, kc, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
 
        // Put a test block
        hash, rep, err := kc.PutB([]byte("foo"))
@@ -747,14 +742,14 @@ func (s *ServerRequiredSuite) TestPutAskGetInvalidToken(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestAskGetKeepProxyConnectionError(c *C) {
-       kc, _ := runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, kc, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
 
        // Point keepproxy at a non-existent keepstore
        locals := map[string]string{
                TestProxyUUID: "http://localhost:12345",
        }
-       router.(*proxyHandler).KeepClient.SetServiceRoots(locals, nil, nil)
+       srv.proxyHandler.KeepClient.SetServiceRoots(locals, nil, nil)
 
        // Ask should result in temporary bad gateway error
        hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
@@ -773,8 +768,8 @@ func (s *ServerRequiredSuite) TestAskGetKeepProxyConnectionError(c *C) {
 }
 
 func (s *NoKeepServerSuite) TestAskGetNoKeepServerError(c *C) {
-       kc, _ := runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, kc, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
 
        hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
        for _, f := range []func() error{
@@ -796,14 +791,14 @@ func (s *NoKeepServerSuite) TestAskGetNoKeepServerError(c *C) {
 }
 
 func (s *ServerRequiredSuite) TestPing(c *C) {
-       kc, _ := runProxy(c, false, false, nil)
-       defer closeListener()
+       srv, kc, _ := runProxy(c, false, false, nil)
+       defer srv.Close()
 
-       rtr, err := MakeRESTRouter(kc, 10*time.Second, &arvados.Cluster{ManagementToken: arvadostest.ManagementToken}, log.New())
+       rtr, err := newHandler(context.Background(), kc, 10*time.Second, &arvados.Cluster{ManagementToken: arvadostest.ManagementToken})
        c.Assert(err, check.IsNil)
 
        req, err := http.NewRequest("GET",
-               "http://"+listener.Addr().String()+"/_health/ping",
+               "http://"+srv.Addr+"/_health/ping",
                nil)
        c.Assert(err, IsNil)
        req.Header.Set("Authorization", "Bearer "+arvadostest.ManagementToken)
index 705082118a82cbd568417cc41c42451145c5a64e..6ea427b9eb39840379b06c721e15d2be98050185 100644 (file)
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package keepproxy
 
 import (
        "net/http"
index e021b442f1d7a0db09848c4364c0dcca45c7d0f0..e7416947d65d2abd5023f77f1b4de997b71c910d 100755 (executable)
@@ -237,7 +237,7 @@ run() {
         fi
 
         if ! (docker ps -a | grep -E "$ARVBOX_CONTAINER-data$" -q) ; then
-            docker create -v /var/lib/postgresql -v $ARVADOS_CONTAINER_PATH --name $ARVBOX_CONTAINER-data arvados/arvbox-demo /bin/true
+            docker create -v /var/lib/postgresql -v $ARVADOS_CONTAINER_PATH --name $ARVBOX_CONTAINER-data arvados/arvbox-demo$TAG /bin/true
         fi
 
         docker run \
index 0374c43e9c5360ab2d9f2a3720560b4af49536de..6e59209a3519f34a3db034aae2ccf36b31004b1d 100755 (executable)
@@ -4,17 +4,17 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 exec 2>&1
-sleep 2
 set -ex -o pipefail
 
 . /usr/local/lib/arvbox/common.sh
 . /usr/local/lib/arvbox/go-setup.sh
 
-flock /var/lib/gopath/gopath.lock go install "git.arvados.org/arvados.git/services/keepproxy"
-install $GOPATH/bin/keepproxy /usr/local/bin
+(cd /usr/local/bin && ln -sf arvados-server keepproxy)
 
 if test "$1" = "--only-deps" ; then
     exit
 fi
 
-exec /usr/local/bin/keepproxy
+/usr/local/lib/arvbox/runsu.sh flock $ARVADOS_CONTAINER_PATH/cluster_config.yml.lock /usr/local/lib/arvbox/cluster-config.sh
+
+exec /usr/local/lib/arvbox/runsu.sh /usr/local/bin/keepproxy
index 1573b6862b3d50f849a54ae4f6a9ab9858754281..b08ff8ffae2abb04c146613d737ca26555b22710 100644 (file)
@@ -68,19 +68,19 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
  
      # Networking
      # WEBUI PORT
-     arv.vm.network "forwarded_port", guest: 9443, host: 9443
+     arv.vm.network "forwarded_port", guest: 443, host: 9443
      # WORKBENCH1
-     arv.vm.network "forwarded_port", guest: 9444, host: 9444
+     arv.vm.network "forwarded_port", guest: 8805, host: 9444
      # WORKBENCH2
-     arv.vm.network "forwarded_port", guest: 9445, host: 9445
+     arv.vm.network "forwarded_port", guest: 443, host: 9445
      # KEEPPROXY
-     arv.vm.network "forwarded_port", guest: 35101, host: 35101
+     arv.vm.network "forwarded_port", guest: 8801, host: 35101
      # KEEPWEB
-     arv.vm.network "forwarded_port", guest: 11002, host: 11002
+     arv.vm.network "forwarded_port", guest: 8802, host: 11002
      # WEBSHELL
-     arv.vm.network "forwarded_port", guest: 14202, host: 14202
+     arv.vm.network "forwarded_port", guest: 8803, host: 14202
      # WEBSOCKET
-     arv.vm.network "forwarded_port", guest: 18002, host: 18002
+     arv.vm.network "forwarded_port", guest: 8804, host: 18002
      arv.vm.provision "shell",
                       inline: "cp -vr /vagrant/config_examples/single_host/single_hostname /home/vagrant/local_config_dir;
                                cp -vr /vagrant/tests /home/vagrant/tests;
index 8c14c56ed3a4336a6820c9359b4477a2c348ecd0..2e85be7905be1ea9eed09df0515e5416cae097d6 100644 (file)
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+# vim: ft=yaml
 ---
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
@@ -72,15 +74,29 @@ arvados:
       host: __DATABASE_INT_IP__
       password: "__DATABASE_PASSWORD__"
       user: __CLUSTER___arvados
-      encoding: en_US.utf8
-      client_encoding: UTF8
+      extra_conn_params:
+        client_encoding: UTF8
+      # Centos7 does not enable SSL by default, so we disable
+      # it here just for testing of the formula purposes only.
+      # You should not do this in production, and should
+      # configure Postgres certificates correctly
+      {%- if grains.os_family in ('RedHat',) %}
+        sslmode: disable
+      {%- endif %}
 
     tls:
       # certificate: ''
       # key: ''
-      # required to test with arvados-snakeoil certs
+      # When using arvados-snakeoil certs set insecure: true
       insecure: false
 
+    resources:
+      virtual_machines:
+        shell:
+          name: shell
+          backend: __SHELL_INT_IP__
+          port: 4200
+
     ### TOKENS
     tokens:
       system_root: __SYSTEM_ROOT_TOKEN__
diff --git a/tools/salt-install/config_examples/multi_host/aws/pillars/arvados_development.sls b/tools/salt-install/config_examples/multi_host/aws/pillars/arvados_development.sls
new file mode 100644 (file)
index 0000000..21712d9
--- /dev/null
@@ -0,0 +1,179 @@
+# -*- coding: utf-8 -*-
+# vim: ft=yaml
+---
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# This config file is used to test a multi-node deployment using a local
+# dispatcher. This setup is not recommended for production use.
+
+# The variables commented out are the default values that the formula uses.
+# The uncommented values are REQUIRED values. If you don't set them, running
+# this formula will fail.
+arvados:
+  ### GENERAL CONFIG
+  version: '__VERSION__'
+  ## It makes little sense to disable this flag, but you can, if you want :)
+  # use_upstream_repo: true
+
+  ## Repo URL is built with grains values. If desired, it can be completely
+  ## overwritten with the pillar parameter 'repo_url'
+  # repo:
+  #   humanname: Arvados Official Repository
+
+  release: __RELEASE__
+
+  ## IMPORTANT!!!!!
+  ## api, workbench and shell require some gems, so you need to make sure ruby
+  ## and deps are installed in order to install and compile the gems.
+  ## We default to `false` in these two variables as it's expected you already
+  ## manage OS packages with some other tool and you don't want us messing up
+  ## with your setup.
+  ruby:
+    ## We set these to `true` here for testing purposes.
+    ## They both default to `false`.
+    manage_ruby: true
+    manage_gems_deps: true
+    # pkg: ruby
+    # gems_deps:
+    #     - curl
+    #     - g++
+    #     - gcc
+    #     - git
+    #     - libcurl4
+    #     - libcurl4-gnutls-dev
+    #     - libpq-dev
+    #     - libxml2
+    #     - libxml2-dev
+    #     - make
+    #     - python3-dev
+    #     - ruby-dev
+    #     - zlib1g-dev
+
+  # config:
+  #   file: /etc/arvados/config.yml
+  #   user: root
+  ## IMPORTANT!!!!!
+  ## If you're intalling any of the rails apps (api, workbench), the group
+  ## should be set to that of the web server, usually `www-data`
+  #   group: root
+  #   mode: 640
+
+  ### ARVADOS CLUSTER CONFIG
+  cluster:
+    name: __CLUSTER__
+    domain: __DOMAIN__
+
+    database:
+      # max concurrent connections per arvados server daemon
+      # connection_pool_max: 32
+      name: __CLUSTER___arvados
+      host: 127.0.0.1
+      password: "__DATABASE_PASSWORD__"
+      user: __CLUSTER___arvados
+      extra_conn_params:
+        client_encoding: UTF8
+      # Centos7 does not enable SSL by default, so we disable
+      # it here just for testing of the formula purposes only.
+      # You should not do this in production, and should
+      # configure Postgres certificates correctly
+      {%- if grains.os_family in ('RedHat',) %}
+        sslmode: disable
+      {%- endif %}
+
+    tls:
+      # certificate: ''
+      # key: ''
+      # When using arvados-snakeoil certs set insecure: true
+      insecure: true
+
+    resources:
+      virtual_machines:
+        shell:
+          name: shell
+          backend: __SHELL_INT_IP__
+          port: 4200
+
+    ### TOKENS
+    tokens:
+      system_root: __SYSTEM_ROOT_TOKEN__
+      management: __MANAGEMENT_TOKEN__
+      anonymous_user: __ANONYMOUS_USER_TOKEN__
+
+    ### KEYS
+    secrets:
+      blob_signing_key: __BLOB_SIGNING_KEY__
+      workbench_secret_key: __WORKBENCH_SECRET_KEY__
+
+    Login:
+      Test:
+        Enable: true
+        Users:
+          __INITIAL_USER__:
+            Email: __INITIAL_USER_EMAIL__
+            Password: __INITIAL_USER_PASSWORD__
+
+    ### VOLUMES
+    ## This should usually match all your `keepstore` instances
+    Volumes:
+      # the volume name will be composed with
+      # <cluster>-nyw5e-<volume>
+      __CLUSTER__-nyw5e-000000000000000:
+        AccessViaHosts:
+          'http://__KEEPSTORE0_INT_IP__:25107':
+            ReadOnly: false
+        Replication: 2
+        Driver: Directory
+        DriverParameters:
+          Root: /tmp
+      __CLUSTER__-nyw5e-000000000000001:
+        AccessViaHosts:
+          'http://__KEEPSTORE1_INT_IP__:25107':
+            ReadOnly: false
+        Replication: 2
+        Driver: Directory
+        DriverParameters:
+          Root: /tmp
+
+    Users:
+      NewUsersAreActive: true
+      AutoAdminFirstUser: true
+      AutoSetupNewUsers: true
+      AutoSetupNewUsersWithRepository: true
+
+    Services:
+      Controller:
+        ExternalURL: 'https://__CLUSTER__.__DOMAIN__:__CONTROLLER_EXT_SSL_PORT__'
+        InternalURLs:
+          'http://localhost:8003': {}
+      Keepbalance:
+        InternalURLs:
+          'http://__CONTROLLER_INT_IP__:9005': {}
+      Keepproxy:
+        ExternalURL: 'https://__CLUSTER__.__DOMAIN__:__KEEP_EXT_SSL_PORT__'
+        InternalURLs:
+          'http://__KEEP_INT_IP__:25100': {}
+      Keepstore:
+        InternalURLs:
+          'http://__KEEPSTORE0_INT_IP__:25107': {}
+          'http://__KEEPSTORE1_INT_IP__:25107': {}
+      RailsAPI:
+        InternalURLs:
+          'http://localhost:8004': {}
+      WebDAV:
+        ExternalURL: 'https://__CLUSTER__.__DOMAIN__:__KEEPWEB_EXT_SSL_PORT__'
+        InternalURLs:
+          'http://localhost:9002': {}
+      WebDAVDownload:
+        ExternalURL: 'https://__CLUSTER__.__DOMAIN__:__KEEPWEB_EXT_SSL_PORT__'
+      WebShell:
+        ExternalURL: 'https://__CLUSTER__.__DOMAIN__:__WEBSHELL_EXT_SSL_PORT__'
+      Websocket:
+        ExternalURL: 'wss://__CLUSTER__.__DOMAIN__:__WEBSOCKET_EXT_SSL_PORT__/websocket'
+        InternalURLs:
+          'http://__WEBSOCKET_INT_IP__:8005': {}
+      Workbench1:
+        ExternalURL: 'https://__CLUSTER__.__DOMAIN__:__WORKBENCH1_EXT_SSL_PORT__'
+      Workbench2:
+        ExternalURL: 'https://__CLUSTER__.__DOMAIN__:__WORKBENCH2_EXT_SSL_PORT__'
diff --git a/tools/salt-install/config_examples/multi_host/aws/states/shell_cron_add_login_sync.sls b/tools/salt-install/config_examples/multi_host/aws/states/shell_cron_add_login_sync.sls
new file mode 100644 (file)
index 0000000..86c591e
--- /dev/null
@@ -0,0 +1,86 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# This state tries to query the controller using the parameters set in
+# the `arvados.cluster.resources.virtual_machines` pillar, to get the
+# ARVADOS_VIRTUAL_MACHINE_UUID for the host and configure the arvados login-sync cron
+# as described in https://doc.arvados.org/main/install/install-shell-server.html
+
+{%- set curr_tpldir = tpldir %}
+{%- set tpldir = 'arvados' %}
+{%- from "arvados/map.jinja" import arvados with context %}
+{%- from "arvados/libtofs.jinja" import files_switch with context %}
+{%- set tpldir = curr_tpldir %}
+
+{%- set virtual_machines = arvados.cluster.resources.virtual_machines | default({}) %}
+{%- set api_token = arvados.cluster.tokens.system_root | yaml_encode %}
+{%- set api_host = arvados.cluster.Services.Controller.ExternalURL | regex_replace('^http(s?)://', '', ignorecase=true) %}
+
+extra_shell_cron_add_login_sync_add_jq_pkg_installed:
+  pkg.installed:
+    - name: jq
+
+{%- for vm, vm_params in virtual_machines.items() %}
+  {%- set vm_name = vm_params.name | default(vm) %}
+
+  # Check if any of the specified virtual_machines parameters corresponds to this instance
+  # It should be an error if we get more than one occurrence
+  {%- if vm_name in [grains['id'], grains['host'], grains['fqdn'], grains['nodename']] or
+         vm_params.backend in [grains['id'], grains['host'], grains['fqdn'], grains['nodename']] +
+                               grains['ipv4'] + grains['ipv6'] %}
+
+    # We need to query the VM UUID
+    {%- set cmd_query_vm_uuid = 'arv --short virtual_machine list' ~
+                                ' --filters \'[["hostname", "=", "' ~ vm_name ~ '"]]\''
+    %}
+
+extra_shell_cron_add_login_sync_add_{{ vm }}_get_vm_uuid_cmd_run:
+  cmd.run:
+    - env:
+      - ARVADOS_API_TOKEN: {{ api_token }}
+      - ARVADOS_API_HOST: {{ api_host }}
+      - ARVADOS_API_HOST_INSECURE: {{ arvados.cluster.tls.insecure | default(false) }}
+    - name: {{ cmd_query_vm_uuid }} | head -1 | tee /tmp/vm_uuid_{{ vm }}
+    - unless:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+    - require:
+      - gem: arvados-shell-package-install-gem-arvados-cli-installed
+
+extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_api_host_cron_env_present:
+  cron.env_present:
+    - name: ARVADOS_API_HOST
+    - value: {{ api_host }}
+    - onlyif:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+
+extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_api_token_cron_env_present:
+  cron.env_present:
+    - name: ARVADOS_API_TOKEN
+    - value: {{ api_token }}
+    - onlyif:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+
+extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_api_host_insecure_cron_env_present:
+  cron.env_present:
+    - name: ARVADOS_API_HOST_INSECURE
+    - value: {{ arvados.cluster.tls.insecure | default(false) }}
+    - onlyif:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+
+extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_virtual_machine_uuid_cron_env_present:
+  cron.env_present:
+    - name: ARVADOS_VIRTUAL_MACHINE_UUID
+    - value: __slot__:salt:cmd.run("cat /tmp/vm_uuid_{{ vm }}")
+    - onlyif:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+
+extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_login_sync_cron_present:
+  cron.present:
+    - name: /usr/local/bin/arvados-login-sync
+    - minute: '*/2'
+    - onlyif:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+
+  {%- endif %}
+{%- endfor %}
diff --git a/tools/salt-install/config_examples/multi_host/aws/states/shell_sudo_passwordless.sls b/tools/salt-install/config_examples/multi_host/aws/states/shell_sudo_passwordless.sls
new file mode 100644 (file)
index 0000000..dbcc9c9
--- /dev/null
@@ -0,0 +1,27 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+{%- set curr_tpldir = tpldir %}
+{%- set tpldir = 'arvados' %}
+{%- from "arvados/map.jinja" import arvados with context %}
+{%- set tpldir = curr_tpldir %}
+
+extra_shell_sudo_passwordless_sudo_pkg_installed:
+  pkg.installed:
+    - name: sudo
+
+extra_shell_sudo_passwordless_config_file_managed:
+  file.managed:
+    - name: /etc/sudoers.d/arvados_passwordless
+    - makedirs: true
+    - user: root
+    - group: root
+    - mode: '0440'
+    - replace: false
+    - contents: |
+        # This file managed by Salt, do not edit by hand!!
+        # Allow members of group sudo to execute any command without password
+        %sudo ALL=(ALL:ALL) NOPASSWD:ALL
+    - require:
+      - pkg: extra_shell_sudo_passwordless_sudo_pkg_installed
index 2579c5ffb0be5b66ee8b892fb7d25b731b82b400..d11e61bba3683b3da20560a8900764276b870db7 100644 (file)
@@ -28,7 +28,6 @@ arvados:
   ## manage OS packages with some other tool and you don't want us messing up
   ## with your setup.
   ruby:
-
     ## We set these to `true` here for testing purposes.
     ## They both default to `false`.
     manage_ruby: true
@@ -90,7 +89,7 @@ arvados:
       virtual_machines:
         shell:
           name: webshell
-          backend: 127.0.1.1
+          backend: 127.0.0.1
           port: 4200
 
     ### TOKENS
index 8e4d66caf5190ff9bc56df42ead5925f2674d438..21bb2c45a83e9d006117dd564ba1e9b916a6a080 100644 (file)
@@ -88,8 +88,8 @@ arvados:
     resources:
       virtual_machines:
         shell:
-          name: webshell
-          backend: 127.0.1.1
+          name: __HOSTNAME_EXT__
+          backend: 127.0.0.1
           port: 4200
 
     ### TOKENS
diff --git a/tools/salt-install/config_examples/single_host/single_hostname/states/shell_cron_add_login_sync.sls b/tools/salt-install/config_examples/single_host/single_hostname/states/shell_cron_add_login_sync.sls
new file mode 100644 (file)
index 0000000..86c591e
--- /dev/null
@@ -0,0 +1,86 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# This state tries to query the controller using the parameters set in
+# the `arvados.cluster.resources.virtual_machines` pillar, to get the
+# ARVADOS_VIRTUAL_MACHINE_UUID for the host and configure the arvados login-sync cron
+# as described in https://doc.arvados.org/main/install/install-shell-server.html
+
+{%- set curr_tpldir = tpldir %}
+{%- set tpldir = 'arvados' %}
+{%- from "arvados/map.jinja" import arvados with context %}
+{%- from "arvados/libtofs.jinja" import files_switch with context %}
+{%- set tpldir = curr_tpldir %}
+
+{%- set virtual_machines = arvados.cluster.resources.virtual_machines | default({}) %}
+{%- set api_token = arvados.cluster.tokens.system_root | yaml_encode %}
+{%- set api_host = arvados.cluster.Services.Controller.ExternalURL | regex_replace('^http(s?)://', '', ignorecase=true) %}
+
+extra_shell_cron_add_login_sync_add_jq_pkg_installed:
+  pkg.installed:
+    - name: jq
+
+{%- for vm, vm_params in virtual_machines.items() %}
+  {%- set vm_name = vm_params.name | default(vm) %}
+
+  # Check if any of the specified virtual_machines parameters corresponds to this instance
+  # It should be an error if we get more than one occurrence
+  {%- if vm_name in [grains['id'], grains['host'], grains['fqdn'], grains['nodename']] or
+         vm_params.backend in [grains['id'], grains['host'], grains['fqdn'], grains['nodename']] +
+                               grains['ipv4'] + grains['ipv6'] %}
+
+    # We need to query the VM UUID
+    {%- set cmd_query_vm_uuid = 'arv --short virtual_machine list' ~
+                                ' --filters \'[["hostname", "=", "' ~ vm_name ~ '"]]\''
+    %}
+
+extra_shell_cron_add_login_sync_add_{{ vm }}_get_vm_uuid_cmd_run:
+  cmd.run:
+    - env:
+      - ARVADOS_API_TOKEN: {{ api_token }}
+      - ARVADOS_API_HOST: {{ api_host }}
+      - ARVADOS_API_HOST_INSECURE: {{ arvados.cluster.tls.insecure | default(false) }}
+    - name: {{ cmd_query_vm_uuid }} | head -1 | tee /tmp/vm_uuid_{{ vm }}
+    - unless:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+    - require:
+      - gem: arvados-shell-package-install-gem-arvados-cli-installed
+
+extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_api_host_cron_env_present:
+  cron.env_present:
+    - name: ARVADOS_API_HOST
+    - value: {{ api_host }}
+    - onlyif:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+
+extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_api_token_cron_env_present:
+  cron.env_present:
+    - name: ARVADOS_API_TOKEN
+    - value: {{ api_token }}
+    - onlyif:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+
+extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_api_host_insecure_cron_env_present:
+  cron.env_present:
+    - name: ARVADOS_API_HOST_INSECURE
+    - value: {{ arvados.cluster.tls.insecure | default(false) }}
+    - onlyif:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+
+extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_virtual_machine_uuid_cron_env_present:
+  cron.env_present:
+    - name: ARVADOS_VIRTUAL_MACHINE_UUID
+    - value: __slot__:salt:cmd.run("cat /tmp/vm_uuid_{{ vm }}")
+    - onlyif:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+
+extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_login_sync_cron_present:
+  cron.present:
+    - name: /usr/local/bin/arvados-login-sync
+    - minute: '*/2'
+    - onlyif:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+
+  {%- endif %}
+{%- endfor %}
diff --git a/tools/salt-install/config_examples/single_host/single_hostname/states/shell_sudo_passwordless.sls b/tools/salt-install/config_examples/single_host/single_hostname/states/shell_sudo_passwordless.sls
new file mode 100644 (file)
index 0000000..dbcc9c9
--- /dev/null
@@ -0,0 +1,27 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+{%- set curr_tpldir = tpldir %}
+{%- set tpldir = 'arvados' %}
+{%- from "arvados/map.jinja" import arvados with context %}
+{%- set tpldir = curr_tpldir %}
+
+extra_shell_sudo_passwordless_sudo_pkg_installed:
+  pkg.installed:
+    - name: sudo
+
+extra_shell_sudo_passwordless_config_file_managed:
+  file.managed:
+    - name: /etc/sudoers.d/arvados_passwordless
+    - makedirs: true
+    - user: root
+    - group: root
+    - mode: '0440'
+    - replace: false
+    - contents: |
+        # This file managed by Salt, do not edit by hand!!
+        # Allow members of group sudo to execute any command without password
+        %sudo ALL=(ALL:ALL) NOPASSWD:ALL
+    - require:
+      - pkg: extra_shell_sudo_passwordless_sudo_pkg_installed
index 0f3c9a14117b964907259d628afbb6de32f1e58d..44f3d4dffc0f3dbd4ba0e438f3a67f04bc499f99 100755 (executable)
 
 set -o pipefail
 
+_exit_handler() {
+  local rc="$?"
+  trap - EXIT
+  if [ "$rc" -ne 0 ]; then
+    echo "Error occurred ($rc) while running $0 at line $1 : $BASH_COMMAND"
+  fi
+  exit "$rc"
+}
+
+trap '_exit_handler $LINENO' EXIT ERR
+
 # capture the directory that the script is running from
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 
@@ -350,9 +361,9 @@ echo "...arvados"
 git clone --quiet https://git.arvados.org/arvados-formula.git ${F_DIR}/arvados
 
 # If we want to try a specific branch of the formula
-if [ "x${BRANCH}" != "x" ]; then
+if [ "x${BRANCH}" != "x" -a $(git rev-parse --abbrev-ref HEAD) != "${BRANCH}" ]; then
   ( cd ${F_DIR}/arvados && git checkout --quiet -t origin/"${BRANCH}" -b "${BRANCH}" )
-elif [ "x${ARVADOS_TAG}" != "x" ]; then
+elif [ "x${ARVADOS_TAG}" != "x" -a $(git rev-parse --abbrev-ref HEAD) != "${ARVADOS_TAG}" ]; then
 ( cd ${F_DIR}/arvados && git checkout --quiet tags/"${ARVADOS_TAG}" -b "${ARVADOS_TAG}" )
 fi
 
@@ -417,30 +428,34 @@ for f in $(ls "${SOURCE_PILLARS_DIR}"/*); do
   "${f}" > "${P_DIR}"/$(basename "${f}")
 done
 
-if [ "x${TEST}" = "xyes" ] && [ ! -d "${SOURCE_TESTS_DIR}" ]; then
-  echo "You requested to run tests, but ${SOURCE_TESTS_DIR} does not exist or is not a directory. Exiting."
-  exit 1
-fi
-mkdir -p ${T_DIR}
-# Replace cluster and domain name in the test files
-for f in $(ls "${SOURCE_TESTS_DIR}"/*); do
-  FILTERS="s#__CLUSTER__#${CLUSTER}#g;
-       s#__CONTROLLER_EXT_SSL_PORT__#${CONTROLLER_EXT_SSL_PORT}#g;
-       s#__DOMAIN__#${DOMAIN}#g;
-       s#__IP_INT__#${IP_INT}#g;
-       s#__INITIAL_USER_EMAIL__#${INITIAL_USER_EMAIL}#g;
-       s#__INITIAL_USER_PASSWORD__#${INITIAL_USER_PASSWORD}#g
-       s#__INITIAL_USER__#${INITIAL_USER}#g;
-       s#__DATABASE_PASSWORD__#${DATABASE_PASSWORD}#g;
-       s#__SYSTEM_ROOT_TOKEN__#${SYSTEM_ROOT_TOKEN}#g"
-  if [ "$USE_SINGLE_HOSTNAME" = "yes" ]; then
-    FILTERS="s#__CLUSTER__.__DOMAIN__#${HOSTNAME_EXT}#g;
-       $FILTERS"
+if [ ! -d "${SOURCE_TESTS_DIR}" ]; then
+  echo "WARNING: The tests directory was not copied to \"${SOURCE_TESTS_DIR}\"."
+  if [ "x${TEST}" = "xyes" ]; then
+    echo "WARNING: Disabling tests for this installation."
   fi
-  sed "$FILTERS" \
-    "${f}" > ${T_DIR}/$(basename "${f}")
-done
-chmod 755 ${T_DIR}/run-test.sh
+  TEST="no"
+else
+  mkdir -p ${T_DIR}
+  # Replace cluster and domain name in the test files
+  for f in $(ls "${SOURCE_TESTS_DIR}"/*); do
+    FILTERS="s#__CLUSTER__#${CLUSTER}#g;
+         s#__CONTROLLER_EXT_SSL_PORT__#${CONTROLLER_EXT_SSL_PORT}#g;
+         s#__DOMAIN__#${DOMAIN}#g;
+         s#__IP_INT__#${IP_INT}#g;
+         s#__INITIAL_USER_EMAIL__#${INITIAL_USER_EMAIL}#g;
+         s#__INITIAL_USER_PASSWORD__#${INITIAL_USER_PASSWORD}#g
+         s#__INITIAL_USER__#${INITIAL_USER}#g;
+         s#__DATABASE_PASSWORD__#${DATABASE_PASSWORD}#g;
+         s#__SYSTEM_ROOT_TOKEN__#${SYSTEM_ROOT_TOKEN}#g"
+    if [ "$USE_SINGLE_HOSTNAME" = "yes" ]; then
+      FILTERS="s#__CLUSTER__.__DOMAIN__#${HOSTNAME_EXT}#g;
+         $FILTERS"
+    fi
+    sed "$FILTERS" \
+      "${f}" > ${T_DIR}/$(basename "${f}")
+  done
+  chmod 755 ${T_DIR}/run-test.sh
+fi
 
 # Replace helper state files that differ from the formula's examples
 if [ -d "${SOURCE_STATES_DIR}" ]; then
@@ -514,7 +529,7 @@ if [ -d "${F_DIR}"/extra/extra ]; then
     # Same when using self-signed certificates.
     SKIP_SNAKE_OIL="dont_add_snakeoil_certs"
   fi
-  for f in $(ls "${F_DIR}"/extra/extra/*.sls | grep -v ${SKIP_SNAKE_OIL}); do
+  for f in $(ls "${F_DIR}"/extra/extra/*.sls | egrep -v "${SKIP_SNAKE_OIL}|shell_"); do
   echo "    - extra.$(basename ${f} | sed 's/.sls$//g')" >> ${S_DIR}/top.sls
   done
   # Use byo or self-signed certificates
@@ -547,6 +562,8 @@ if [ -z "${ROLES}" ]; then
   echo "    - postgres" >> ${S_DIR}/top.sls
   echo "    - docker.software" >> ${S_DIR}/top.sls
   echo "    - arvados" >> ${S_DIR}/top.sls
+  echo "    - extra.shell_sudo_passwordless" >> ${S_DIR}/top.sls
+  echo "    - extra.shell_cron_add_login_sync" >> ${S_DIR}/top.sls
 
   # Pillars
   echo "    - docker" >> ${P_DIR}/top.sls
@@ -665,6 +682,7 @@ else
         sed -i "s/__NGINX_INSTALL_SOURCE__/${NGINX_INSTALL_SOURCE}/g" ${P_DIR}/nginx_passenger.sls
       ;;
       "controller" | "websocket" | "workbench" | "workbench2" | "webshell" | "keepweb" | "keepproxy")
+        NGINX_INSTALL_SOURCE="install_from_repo"
         # States
         if [ "${R}" = "workbench" ]; then
           NGINX_INSTALL_SOURCE="install_from_phusionpassenger"
@@ -745,7 +763,7 @@ else
                     s#__CERT_PEM__#/etc/nginx/ssl/arvados-${R}.pem#g;
                     s#__CERT_KEY__#/etc/nginx/ssl/arvados-${R}.key#g" \
             ${P_DIR}/nginx_${R}_configuration.sls
-            grep -q ${R} ${P_DIR}/extra_custom_certs.sls || echo "  - ${R}" >> ${P_DIR}/extra_custom_certs.sls
+            grep -q ${R}$ ${P_DIR}/extra_custom_certs.sls || echo "  - ${R}" >> ${P_DIR}/extra_custom_certs.sls
           fi
         fi
         # We need to tweak the Nginx's pillar depending whether we want plain nginx or nginx+passenger
@@ -753,6 +771,8 @@ else
       ;;
       "shell")
         # States
+        echo "    - extra.shell_sudo_passwordless" >> ${S_DIR}/top.sls
+        echo "    - extra.shell_cron_add_login_sync" >> ${S_DIR}/top.sls
         grep -q "docker" ${S_DIR}/top.sls       || echo "    - docker.software" >> ${S_DIR}/top.sls
         grep -q "arvados.${R}" ${S_DIR}/top.sls || echo "    - arvados.${R}" >> ${S_DIR}/top.sls
         # Pillars
@@ -788,15 +808,17 @@ fi
 
 # Leave a copy of the Arvados CA so the user can copy it where it's required
 if [ "$DEV_MODE" = "yes" ]; then
-  echo "Copying the Arvados CA certificate to the installer dir, so you can import it"
+  ARVADOS_SNAKEOIL_CA_DEST_FILE="${SCRIPT_DIR}/${CLUSTER}.${DOMAIN}-arvados-snakeoil-ca.pem"
+
   # If running in a vagrant VM, also add default user to docker group
   if [ "x${VAGRANT}" = "xyes" ]; then
-    cp /etc/ssl/certs/arvados-snakeoil-ca.pem /vagrant/${CLUSTER}.${DOMAIN}-arvados-snakeoil-ca.pem
-
     echo "Adding the vagrant user to the docker group"
     usermod -a -G docker vagrant
-  else
-    cp /etc/ssl/certs/arvados-snakeoil-ca.pem ${SCRIPT_DIR}/${CLUSTER}.${DOMAIN}-arvados-snakeoil-ca.pem
+    ARVADOS_SNAKEOIL_CA_DEST_FILE="/vagrant/${CLUSTER}.${DOMAIN}-arvados-snakeoil-ca.pem"
+  fi
+  if [ -f /etc/ssl/certs/arvados-snakeoil-ca.pem ]; then
+    echo "Copying the Arvados CA certificate to the installer dir, so you can import it"
+    cp /etc/ssl/certs/arvados-snakeoil-ca.pem ${ARVADOS_SNAKEOIL_CA_DEST_FILE}
   fi
 fi
 
index cf43273a14d584b390079400b096f12ec1e2d683..42ab716642739dee5d0d9a76b51f92d24146be7c 100755 (executable)
@@ -9,14 +9,6 @@ export ARVADOS_API_HOST_INSECURE=true
 
 set -o pipefail
 
-# First, validate that the CA is installed and that we can query it with no errors.
-if ! curl -s -o /dev/null https://${ARVADOS_API_HOST}/users/welcome?return_to=%2F; then
-  echo "The Arvados CA was not correctly installed. Although some components will work,"
-  echo "others won't. Please verify that the CA cert file was installed correctly and"
-  echo "retry running these tests."
-  exit 1
-fi
-
 # https://doc.arvados.org/v2.0/install/install-jobs-image.html
 echo "Creating Arvados Standard Docker Images project"
 uuid_prefix=$(arv --format=uuid user current | cut -d- -f1)
index 997da57e052db81a25306507b23b3f60935b129e..26a4f28067663898ac660e647d91ba5fa71dbfb3 100755 (executable)
@@ -10,12 +10,52 @@ import arvados
 import arvados.util
 import datetime
 import ciso8601
+import csv
 
 def parse_arguments(arguments):
     arg_parser = argparse.ArgumentParser()
-    arg_parser.add_argument('--days', type=int, required=True)
+    arg_parser.add_argument('--start', help='Start date for the report in YYYY-MM-DD format (UTC)')
+    arg_parser.add_argument('--end', help='End date for the report in YYYY-MM-DD format (UTC)')
+    arg_parser.add_argument('--days', type=int, help='Number of days before now() to start the report')
+    arg_parser.add_argument('--csv', action='store_true', help='Output in csv format (default: false)')
     args = arg_parser.parse_args(arguments)
-    return args
+
+    if args.days and (args.start or args.end):
+        arg_parser.print_help()
+        print("Error: either specify --days or both --start and --end")
+        exit(1)
+
+    if not args.days and (not args.start or not args.end):
+        arg_parser.print_help()
+        print("\nError: either specify --days or both --start and --end")
+        exit(1)
+
+    if (args.start and not args.end) or (args.end and not args.start):
+        arg_parser.print_help()
+        print("\nError: no start or end date found, either specify --days or both --start and --end")
+        exit(1)
+
+    if args.days:
+        to = datetime.datetime.utcnow()
+        since = to - datetime.timedelta(days=args.days)
+
+    if args.start:
+        try:
+            since = datetime.datetime.strptime(args.start,"%Y-%m-%d")
+        except:
+            arg_parser.print_help()
+            print("\nError: start date must be in YYYY-MM-DD format")
+            exit(1)
+
+    if args.end:
+        try:
+            to = datetime.datetime.strptime(args.end,"%Y-%m-%d")
+        except:
+            arg_parser.print_help()
+            print("\nError: end date must be in YYYY-MM-DD format")
+            exit(1)
+
+    return args, since, to
 
 def getowner(arv, uuid, owners):
     if uuid is None:
@@ -33,20 +73,46 @@ def getowner(arv, uuid, owners):
     return getowner(arv, owners[uuid], owners)
 
 def getuserinfo(arv, uuid):
-    u = arv.users().get(uuid=uuid).execute()
+    try:
+        u = arv.users().get(uuid=uuid).execute()
+    except:
+        return "deleted user (%susers/%s)" % (arv.config()["Services"]["Workbench1"]["ExternalURL"],
+                                                       uuid)
     prof = "\n".join("  %s: \"%s\"" % (k, v) for k, v in u["prefs"].get("profile", {}).items() if v)
     if prof:
         prof = "\n"+prof+"\n"
     return "%s %s <%s> (%susers/%s)%s" % (u["first_name"], u["last_name"], u["email"],
                                                        arv.config()["Services"]["Workbench1"]["ExternalURL"],
                                                        uuid, prof)
+def getuserinfocsv(arv, uuid):
+    try:
+        u = arv.users().get(uuid=uuid).execute()
+    except:
+        return [uuid,"deleted","user",""]
+    return [uuid, u["first_name"], u["last_name"], u["email"]]
+
 
 collectionNameCache = {}
-def getCollectionName(arv, uuid):
-    if uuid not in collectionNameCache:
-        u = arv.collections().get(uuid=uuid).execute()
-        collectionNameCache[uuid] = u["name"]
-    return collectionNameCache[uuid]
+def getCollectionName(arv, uuid, pdh):
+    lookupField = uuid
+    filters = [["uuid","=",uuid]]
+    cached = uuid in collectionNameCache
+    # look up by uuid if it is available, fall back to look up by pdh
+    if len(uuid) != 27:
+        # Look up by pdh. Note that this can be misleading; the download could
+        # have happened from a collection with the same pdh but different name.
+        # We arbitrarily pick the oldest collection with the pdh to lookup the
+        # name, if the uuid for the request is not known.
+        lookupField = pdh
+        filters = [["portable_data_hash","=",pdh]]
+        cached = pdh in collectionNameCache
+
+    if not cached:
+        u = arv.collections().list(filters=filters,order="created_at",limit=1).execute().get("items")
+        if len(u) < 1:
+            return "(deleted)"
+        collectionNameCache[lookupField] = u[0]["name"]
+    return collectionNameCache[lookupField]
 
 def getname(u):
     return "\"%s\" (%s)" % (u["name"], u["uuid"])
@@ -55,17 +121,20 @@ def main(arguments=None):
     if arguments is None:
         arguments = sys.argv[1:]
 
-    args = parse_arguments(arguments)
+    args, since, to = parse_arguments(arguments)
 
     arv = arvados.api()
 
-    since = datetime.datetime.utcnow() - datetime.timedelta(days=args.days)
+    prefix = ''
+    suffix = "\n"
+    if args.csv:
+        prefix = '# '
+        suffix = ''
+    print("%sUser activity on %s between %s and %s%s" % (prefix, arv.config()["ClusterID"],
+                                                       since.isoformat(sep=" ", timespec="minutes"),
+                                                       to.isoformat(sep=" ", timespec="minutes"), suffix))
 
-    print("User activity on %s between %s and %s\n" % (arv.config()["ClusterID"],
-                                                       (datetime.datetime.now() - datetime.timedelta(days=args.days)).isoformat(sep=" ", timespec="minutes"),
-                                                       datetime.datetime.now().isoformat(sep=" ", timespec="minutes")))
-
-    events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", since.isoformat()]])
+    events = arvados.util.keyset_list_all(arv.logs().list, filters=[["created_at", ">=", since.isoformat()],["created_at", "<", to.isoformat()]])
 
     users = {}
     owners = {}
@@ -74,99 +143,112 @@ def main(arguments=None):
         owner = getowner(arv, e["object_owner_uuid"], owners)
         users.setdefault(owner, [])
         event_at = ciso8601.parse_datetime(e["event_at"]).astimezone().isoformat(sep=" ", timespec="minutes")
-        # loguuid = e["uuid"]
-        loguuid = ""
+        loguuid = e["uuid"]
 
         if e["event_type"] == "create" and e["object_uuid"][6:11] == "tpzed":
             users.setdefault(e["object_uuid"], [])
-            users[e["object_uuid"]].append("%s User account created" % event_at)
+            users[e["object_uuid"]].append([loguuid, event_at, "User account created"])
 
         elif e["event_type"] == "update" and e["object_uuid"][6:11] == "tpzed":
             pass
 
         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "xvhdp":
             if e["properties"]["new_attributes"]["requesting_container_uuid"] is None:
-                users[owner].append("%s Ran container %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
+                users[owner].append([loguuid, event_at, "Ran container %s" % (getname(e["properties"]["new_attributes"]))])
 
         elif e["event_type"] == "update" and e["object_uuid"][6:11] == "xvhdp":
             pass
 
         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "j7d0g":
-            users[owner].append("%s Created project %s" %  (event_at, getname(e["properties"]["new_attributes"])))
+            users[owner].append([loguuid, event_at,"Created project %s" % (getname(e["properties"]["new_attributes"]))])
 
         elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "j7d0g":
-            users[owner].append("%s Deleted project %s" % (event_at, getname(e["properties"]["old_attributes"])))
+            users[owner].append([loguuid, event_at,"Deleted project %s" % (getname(e["properties"]["old_attributes"]))])
 
         elif e["event_type"] == "update" and e["object_uuid"][6:11] == "j7d0g":
-            users[owner].append("%s Updated project %s" % (event_at, getname(e["properties"]["new_attributes"])))
+            users[owner].append([loguuid, event_at,"Updated project %s" % (getname(e["properties"]["new_attributes"]))])
 
         elif e["event_type"] in ("create", "update") and e["object_uuid"][6:11] == "gj3su":
             since_last = None
-            if len(users[owner]) > 0 and users[owner][-1].endswith("activity"):
-                sp = users[owner][-1].split(" ")
-                start = sp[0]+" "+sp[1]
-                since_last = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(sp[3]+" "+sp[4])
+            if len(users[owner]) > 0 and users[owner][-1][-1].endswith("activity"):
+                sp = users[owner][-1][-1].split(" ")
+                start = users[owner][-1][1]
+                since_last = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(sp[1]+" "+sp[2])
                 span = ciso8601.parse_datetime(event_at) - ciso8601.parse_datetime(start)
 
             if since_last is not None and since_last < datetime.timedelta(minutes=61):
-                users[owner][-1] = "%s to %s (%02d:%02d) Account activity" % (start, event_at, span.days*24 + int(span.seconds/3600), int((span.seconds % 3600)/60))
+                users[owner][-1] = [loguuid, start,"to %s (%02d:%02d) Account activity" % (event_at, span.days*24 + int(span.seconds/3600), int((span.seconds % 3600)/60))]
             else:
-                users[owner].append("%s to %s (0:00) Account activity" % (event_at, event_at))
+                users[owner].append([loguuid, event_at,"to %s (0:00) Account activity" % (event_at)])
 
         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "o0j2j":
             if e["properties"]["new_attributes"]["link_class"] == "tag":
-                users[owner].append("%s Tagged %s" % (event_at, e["properties"]["new_attributes"]["head_uuid"]))
+                users[owner].append([event_at,"Tagged %s" % (e["properties"]["new_attributes"]["head_uuid"])])
             elif e["properties"]["new_attributes"]["link_class"] == "permission":
-                users[owner].append("%s Shared %s with %s" % (event_at, e["properties"]["new_attributes"]["tail_uuid"], e["properties"]["new_attributes"]["head_uuid"]))
+                users[owner].append([loguuid, event_at,"Shared %s with %s" % (e["properties"]["new_attributes"]["tail_uuid"], e["properties"]["new_attributes"]["head_uuid"])])
             else:
-                users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+                users[owner].append([loguuid, event_at,"%s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"])])
 
         elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "o0j2j":
             if e["properties"]["old_attributes"]["link_class"] == "tag":
-                users[owner].append("%s Untagged %s" % (event_at, e["properties"]["old_attributes"]["head_uuid"]))
+                users[owner].append([loguuid, event_at,"Untagged %s" % (e["properties"]["old_attributes"]["head_uuid"])])
             elif e["properties"]["old_attributes"]["link_class"] == "permission":
-                users[owner].append("%s Unshared %s with %s" % (event_at, e["properties"]["old_attributes"]["tail_uuid"], e["properties"]["old_attributes"]["head_uuid"]))
+                users[owner].append([loguuid, event_at,"Unshared %s with %s" % (e["properties"]["old_attributes"]["tail_uuid"], e["properties"]["old_attributes"]["head_uuid"])])
             else:
-                users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+                users[owner].append([loguuid, event_at,"%s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"])])
 
         elif e["event_type"] == "create" and e["object_uuid"][6:11] == "4zz18":
             if e["properties"]["new_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
                 pass
             else:
-                users[owner].append("%s Created collection %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
+                users[owner].append([loguuid, event_at,"Created collection %s" % (getname(e["properties"]["new_attributes"]))])
 
         elif e["event_type"] == "update" and e["object_uuid"][6:11] == "4zz18":
-            users[owner].append("%s Updated collection %s %s" % (event_at, getname(e["properties"]["new_attributes"]), loguuid))
+            users[owner].append([loguuid, event_at,"Updated collection %s" % (getname(e["properties"]["new_attributes"]))])
 
         elif e["event_type"] == "delete" and e["object_uuid"][6:11] == "4zz18":
             if e["properties"]["old_attributes"]["properties"].get("type") in ("log", "output", "intermediate"):
                 pass
             else:
-                users[owner].append("%s Deleted collection %s %s" % (event_at, getname(e["properties"]["old_attributes"]), loguuid))
+                users[owner].append([loguuid, event_at, "Deleted collection %s" % (getname(e["properties"]["old_attributes"]))])
 
         elif e["event_type"] == "file_download":
-                users[e["object_uuid"]].append("%s Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (event_at,
+                users.setdefault(e["object_uuid"], [])
+                users[e["object_uuid"]].append([loguuid, event_at, "Downloaded file \"%s\" from \"%s\" (%s) (%s)" % (
                                                                                        e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
-                                                                                       getCollectionName(arv, e["properties"].get("collection_uuid")),
+                                                                                       getCollectionName(arv, e["properties"].get("collection_uuid"), e["properties"].get("portable_data_hash")),
                                                                                        e["properties"].get("collection_uuid"),
-                                                                                       e["properties"].get("portable_data_hash")))
+                                                                                       e["properties"].get("portable_data_hash"))])
+
 
         elif e["event_type"] == "file_upload":
-                users[e["object_uuid"]].append("%s Uploaded file \"%s\" to \"%s\" (%s)" % (event_at,
+                users.setdefault(e["object_uuid"], [])
+                users[e["object_uuid"]].append([loguuid, event_at, "Uploaded file \"%s\" to \"%s\" (%s)" % (
                                                                                     e["properties"].get("collection_file_path") or e["properties"].get("reqPath"),
-                                                                                    getCollectionName(arv, e["properties"].get("collection_uuid")),
-                                                                                    e["properties"].get("collection_uuid")))
+                                                                                    getCollectionName(arv, e["properties"].get("collection_uuid"), e["properties"].get("portable_data_hash")),
+                                                                                    e["properties"].get("collection_uuid"))])
 
         else:
-            users[owner].append("%s %s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"], loguuid))
+            users[owner].append([loguuid, event_at, "%s %s %s" % (e["event_type"], e["object_kind"], e["object_uuid"])])
+
+    if args.csv:
+        csvwriter = csv.writer(sys.stdout, dialect='unix')
 
     for k,v in users.items():
         if k is None or k.endswith("-tpzed-000000000000000"):
             continue
-        print(getuserinfo(arv, k))
-        for ev in v:
-            print("  %s" % ev)
-        print("")
+        if not args.csv:
+          print(getuserinfo(arv, k))
+          for ev in v:
+              # Remove the log entry uuid, this report is intended for human consumption
+              ev.pop(0)
+              print("  %s" % ' '.join(ev))
+          print("")
+        else:
+          user = getuserinfocsv(arv, k)
+          for ev in v:
+            ev = user + ev
+            csvwriter.writerow(ev)
 
 if __name__ == "__main__":
     main()