Merge branch '17150-system-root-token'
authorJavier Bértoli <jbertoli@curii.com>
Wed, 25 Nov 2020 18:06:50 +0000 (15:06 -0300)
committerJavier Bértoli <jbertoli@curii.com>
Wed, 25 Nov 2020 18:06:50 +0000 (15:06 -0300)
closes #17146, #17147, #17150

Arvados-DCO-1.1-Signed-off-by: Javier Bértoli <jbertoli@curii.com>

33 files changed:
apps/workbench/app/controllers/application_controller.rb
apps/workbench/app/controllers/users_controller.rb
apps/workbench/app/views/users/profile.html.erb
build/build-dev-docker-jobs-image.sh
build/run-build-docker-jobs-image.sh
build/run-build-packages-python-and-ruby.sh
build/run-library.sh
cmd/arvados-client/cmd.go
doc/admin/config.html.textile.liquid
doc/admin/federation.html.textile.liquid
doc/admin/upgrading.html.textile.liquid
doc/api/keep-s3.html.textile.liquid
doc/api/methods.html.textile.liquid
doc/api/methods/jobs.html.textile.liquid
doc/api/methods/pipeline_templates.html.textile.liquid
doc/install/install-keep-web.html.textile.liquid
doc/user/tutorials/wgs-tutorial.html.textile.liquid
lib/controller/federation/conn.go
lib/controller/integration_test.go
lib/controller/rpc/conn_test.go
lib/costanalyzer/cmd.go [new file with mode: 0644]
lib/costanalyzer/costanalyzer.go [new file with mode: 0644]
lib/costanalyzer/costanalyzer_test.go [new file with mode: 0644]
sdk/cwl/test_with_arvbox.sh
sdk/go/arvadostest/fixtures.go
services/api/app/models/api_client_authorization.rb
services/api/test/fixtures/collections.yml
services/api/test/fixtures/container_requests.yml
services/api/test/fixtures/containers.yml
services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
services/keep-web/s3.go
services/keep-web/s3_test.go
services/keep-web/server_test.go

index cf4bfa8c5400b56543e6cfe7174091b489095b74..6d139cd5fdb207ad872ec700225f9ae7b75b9047 100644 (file)
@@ -760,7 +760,7 @@ class ApplicationController < ActionController::Base
     if current_user && !profile_config.empty?
       current_user_profile = current_user.prefs[:profile]
       profile_config.each do |k, entry|
-        if entry['Required']
+        if entry[:Required]
           if !current_user_profile ||
              !current_user_profile[k] ||
              current_user_profile[k].empty?
index 27fc12bf4c9fc7d3239131f96e93d114588bad31..21ea7a8e693e00ccd5c4599275b44fc33b1e9cdb 100644 (file)
@@ -39,6 +39,18 @@ class UsersController < ApplicationController
 
   def profile
     params[:offer_return_to] ||= params[:return_to]
+
+    # In a federation situation, when you get a user record using
+    # "current user of token" it can fetch a stale user record from
+    # the local cluster. So even if profile settings were just written
+    # to the user record on the login cluster (because the user just
+    # filled out the profile), those profile settings may not appear
+    # in the "current user" response because it is returning a cached
+    # record from the local cluster.
+    #
+    # In this case, explicitly fetching user record forces it to get a
+    # fresh record from the login cluster.
+    Thread.current[:user] = User.find(current_user.uuid)
   end
 
   def activity
index 6692196dabf717e40defd77e9c6c0c2538d3c393..caa22bda11cd0925fb5a9a98636860ad8827c61d 100644 (file)
@@ -68,29 +68,30 @@ SPDX-License-Identifier: AGPL-3.0 %>
               </div>
 
               <% profile_config.kind_of?(Array) && profile_config.andand.each do |entry| %>
-                <% if entry['Key'] %>
+                <% if entry[:Key] %>
                   <%
                       show_save_button = true
-                      label = entry['Required'] ? '* ' : ''
-                      label += entry['FormFieldTitle']
-                      value = current_user_profile[entry['Key'].to_sym] if current_user_profile
+                      label = entry[:Required] ? '* ' : ''
+                      label += entry[:FormFieldTitle]
+                      value = current_user_profile[entry[:Key].to_sym] if current_user_profile
                   %>
                   <div class="form-group">
-                    <label for="<%=entry['Key']%>"
+                    <label for="<%=entry[:Key]%>"
                            class="col-sm-3 control-label"
-                           style=<%="color:red" if entry['Required']&&(!value||value.empty?)%>> <%=label%>
+                           style=<%="color:red" if entry[:Required]&&(!value||value.empty?)%>> <%=label%>
                     </label>
-                    <% if entry['Type'] == 'select' %>
+                    <% if entry[:Type] == 'select' %>
                       <div class="col-sm-8">
-                        <select class="form-control" name="user[prefs][profile][<%=entry['Key']%>]">
-                          <% entry['Options'].each do |option, _| %>
+                        <select class="form-control" name="user[prefs][profile][<%=entry[:Key]%>]">
+                          <% entry[:Options].each do |option, _| %>
+                           <% option = option.to_s %>
                             <option value="<%=option%>" <%='selected' if option==value%>><%=option%></option>
                           <% end %>
                         </select>
                       </div>
                     <% else %>
                       <div class="col-sm-8">
-                        <input type="text" class="form-control" name="user[prefs][profile][<%=entry['Key']%>]" placeholder="<%=entry['FormFieldDescription']%>" value="<%=value%>" ></input>
+                        <input type="text" class="form-control" name="user[prefs][profile][<%=entry[:Key]%>]" placeholder="<%=entry[:FormFieldDescription]%>" value="<%=value%>" ></input>
                       </div>
                     <% end %>
                   </div>
index 0e570d5f31838037160f5797f80e1fc0cc7048e4..af838d68e8c7e33ac5f7d1d0f10e52fa7b95b47f 100755 (executable)
@@ -69,10 +69,10 @@ fi
 
 . build/run-library.sh
 
+# This defines python_sdk_version and cwl_runner_version with python-style
+# package suffixes (.dev/rc)
 calculate_python_sdk_cwl_package_versions
 
-cwl_runner_version=$(echo -n $cwl_runner_version | sed s/~dev/.dev/g | sed s/~rc/rc/g)
-
 set -x
 docker build --no-cache --build-arg sdk=$sdk --build-arg runner=$runner --build-arg salad=$salad --build-arg cwltool=$cwltool --build-arg pythoncmd=$py --build-arg pipcmd=$pipcmd -f "$WORKSPACE/sdk/dev-jobs.dockerfile" -t arvados/jobs:$cwl_runner_version "$WORKSPACE/sdk"
 echo arv-keepdocker arvados/jobs $cwl_runner_version
index 59914a2ee9dcdeb78a7de4eb9d59c7716342ff05..07577182166ed2a35a8a16eceabee47ffb1b7aa5 100755 (executable)
@@ -139,41 +139,47 @@ if [[ -z "$ARVADOS_BUILDING_VERSION" ]] && ! [[ -z "$version_tag" ]]; then
        ARVADOS_BUILDING_ITERATION="1"
 fi
 
+# This defines python_sdk_version and cwl_runner_version with python-style
+# package suffixes (.dev/rc)
 calculate_python_sdk_cwl_package_versions
 
-echo cwl_runner_version $cwl_runner_version python_sdk_version $python_sdk_version
-
-if [[ "${python_sdk_version}" != "${ARVADOS_BUILDING_VERSION}" ]]; then
-       python_sdk_version="${python_sdk_version}-1"
-else
-       python_sdk_version="${ARVADOS_BUILDING_VERSION}-${ARVADOS_BUILDING_ITERATION}"
+if [[ -z "$cwl_runner_version" ]]; then
+  echo "ERROR: cwl_runner_version is empty";
+  exit 1
 fi
 
-# What we use to tag the Docker image.  For development and release
-# candidate packages, the OS package has a "~dev" or "~rc" suffix, but
-# Python requires a ".dev" or "rc" suffix.  Arvados-cwl-runner will be
-# expecting the Python-compatible version string when it tries to pull
-# the Docker image, but --build-arg is expecting the OS package
+echo cwl_runner_version $cwl_runner_version python_sdk_version $python_sdk_version
+
+# For development and release candidate packages, the OS package has a "~dev"
+# or "~rc" suffix, but Python requires a ".dev" or "rc" suffix.
+#
+# Arvados-cwl-runner will be expecting the Python-compatible version string
+# when it tries to pull the Docker image, so we use that to tag the Docker
+# image.
+#
+# The --build-arg docker invocation arguments are expecting the OS package
 # version.
-cwl_runner_version_tag=$(echo -n $cwl_runner_version | sed s/~dev/.dev/g | sed s/~rc/rc/g)
+python_sdk_version_os=$(echo -n $python_sdk_version | sed s/.dev/~dev/g | sed s/rc/~rc/g)
+cwl_runner_version_os=$(echo -n $cwl_runner_version | sed s/.dev/~dev/g | sed s/rc/~rc/g)
 
-if [[ -z "$cwl_runner_version_tag" ]]; then
-  echo "ERROR: cwl_runner_version_tag is empty";
-  exit 1
+if [[ "${python_sdk_version}" != "${ARVADOS_BUILDING_VERSION}" ]]; then
+       python_sdk_version_os="${python_sdk_version_os}-1"
+else
+       python_sdk_version_os="${ARVADOS_BUILDING_VERSION}-${ARVADOS_BUILDING_ITERATION}"
 fi
 
-if [[ "${cwl_runner_version}" != "${ARVADOS_BUILDING_VERSION}" ]]; then
-       cwl_runner_version="${cwl_runner_version}-1"
+if [[ "${cwl_runner_version_os}" != "${ARVADOS_BUILDING_VERSION}" ]]; then
+       cwl_runner_version_os="${cwl_runner_version_os}-1"
 else
-       cwl_runner_version="${ARVADOS_BUILDING_VERSION}-${ARVADOS_BUILDING_ITERATION}"
+       cwl_runner_version_os="${ARVADOS_BUILDING_VERSION}-${ARVADOS_BUILDING_ITERATION}"
 fi
 
 cd docker/jobs
 docker build $NOCACHE \
-       --build-arg python_sdk_version=${python_sdk_version} \
-       --build-arg cwl_runner_version=${cwl_runner_version} \
+       --build-arg python_sdk_version=${python_sdk_version_os} \
+       --build-arg cwl_runner_version=${cwl_runner_version_os} \
        --build-arg repo_version=${REPO} \
-       -t arvados/jobs:$cwl_runner_version_tag .
+       -t arvados/jobs:$cwl_runner_version .
 
 ECODE=$?
 
@@ -207,7 +213,7 @@ else
         ## 20150526 nico -- *sometimes* dockerhub needs re-login
         ## even though credentials are already in .dockercfg
         docker login -u arvados
-        docker_push arvados/jobs:$cwl_runner_version_tag
+        docker_push arvados/jobs:$cwl_runner_version
         title "upload arvados images finished (`timer`)"
     else
         title "upload arvados images SKIPPED because no --upload option set (`timer`)"
index f3b7564d714f41492c8ff55933707a98c99086fb..f255307607c492468f567e7f649c6dcd0818b919 100755 (executable)
@@ -6,7 +6,6 @@
 COLUMNS=80
 
 . `dirname "$(readlink -f "$0")"`/run-library.sh
-#. `dirname "$(readlink -f "$0")"`/libcloud-pin.sh
 
 read -rd "\000" helpmessage <<EOF
 $(basename $0): Build Arvados Python packages and Ruby gems
@@ -50,6 +49,16 @@ gem_wrapper() {
   title "End of $gem_name gem build (`timer`)"
 }
 
+handle_python_package () {
+  # This function assumes the current working directory is the python package directory
+  if [ -n "$(find dist -name "*-$(nohash_version_from_git).tar.gz" -print -quit)" ]; then
+    echo "This package doesn't need rebuilding."
+    return
+  fi
+  # Make sure only to use sdist - that's the only format pip can deal with (sigh)
+  python3 setup.py $DASHQ_UNLESS_DEBUG sdist
+}
+
 python_wrapper() {
   local package_name="$1"; shift
   local package_directory="$1"; shift
index 1716cf3706240323ef96486398634cd6084d449a..6f95a8f4bfd8cb9736a5b9fba6c8076005ce2de3 100755 (executable)
@@ -79,16 +79,6 @@ calculate_python_sdk_cwl_package_versions() {
   cwl_runner_version=$(cd sdk/cwl && python3 arvados_version.py)
 }
 
-handle_python_package () {
-  # This function assumes the current working directory is the python package directory
-  if [ -n "$(find dist -name "*-$(nohash_version_from_git).tar.gz" -print -quit)" ]; then
-    # This package doesn't need rebuilding.
-    return
-  fi
-  # Make sure only to use sdist - that's the only format pip can deal with (sigh)
-  python setup.py $DASHQ_UNLESS_DEBUG sdist
-}
-
 handle_ruby_gem() {
     local gem_name="$1"; shift
     local gem_version="$(nohash_version_from_git)"
@@ -690,9 +680,9 @@ fpm_build_virtualenv () {
     done
   fi
 
-  # the python-arvados-cwl-runner package comes with cwltool, expose that version
-  if [[ -e "$WORKSPACE/$PKG_DIR/dist/build/usr/share/python2.7/dist/python-arvados-cwl-runner/bin/cwltool" ]]; then
-    COMMAND_ARR+=("usr/share/python2.7/dist/python-arvados-cwl-runner/bin/cwltool=/usr/bin/")
+  # the python3-arvados-cwl-runner package comes with cwltool, expose that version
+  if [[ -e "$WORKSPACE/$PKG_DIR/dist/build/usr/share/$python/dist/python-arvados-cwl-runner/bin/cwltool" ]]; then
+    COMMAND_ARR+=("usr/share/$python/dist/python-arvados-cwl-runner/bin/cwltool=/usr/bin/")
   fi
 
   COMMAND_ARR+=(".")
index bcc3dda09ac91559d4a35227ef81c95bf3e979cd..47fcd5ad7dc88275c5c9ce47369f3432ac861632 100644 (file)
@@ -9,6 +9,7 @@ import (
 
        "git.arvados.org/arvados.git/lib/cli"
        "git.arvados.org/arvados.git/lib/cmd"
+       "git.arvados.org/arvados.git/lib/costanalyzer"
        "git.arvados.org/arvados.git/lib/deduplicationreport"
        "git.arvados.org/arvados.git/lib/mount"
 )
@@ -55,6 +56,7 @@ var (
 
                "mount":                mount.Command,
                "deduplication-report": deduplicationreport.Command,
+               "costanalyzer":         costanalyzer.Command,
        })
 )
 
index 316b6f48b7f567d8e92aefe3a9926ff1110b680c..745cd2853265a096ad99b55bd6c53124feb22871 100644 (file)
@@ -10,7 +10,7 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
-The master Arvados configuration is stored at @/etc/arvados/config.yml@
+The Arvados configuration is stored at @/etc/arvados/config.yml@
 
 See "Migrating Configuration":config-migration.html for information about migrating from legacy component-specific configuration files.
 
index eb4a451a891e4151b016f5d9d7edf6ae0d191c90..d6ffb48f4143d9e7bfec42d80da24aaa9bc8c343 100644 (file)
@@ -57,9 +57,9 @@ Clusters:
       LoginCluster: clsr1
 </pre>
 
-The @LoginCluster@ configuration redirects all user logins to the LoginCluster, and the LoginCluster will issue API tokens which will be accepted by the federation.  Users are activated or deactivated across the entire federation based on their status on the master cluster.
+The @LoginCluster@ configuration redirects all user logins to the LoginCluster, and the LoginCluster will issue API tokens which will be accepted by the federation.  Users are activated or deactivated across the entire federation based on their status on the login cluster.
 
-Note: tokens issued by the master cluster need to be periodically re-validated when used on other clusters in the federation.  The period between revalidation attempts is configured with @Login.RemoteTokenRefresh@.  The default is 5 minutes.  A longer period reduces overhead from validating tokens, but means it may take longer for other clusters to notice when a token has been revoked or a user has changed status (being activated/deactivated, admin flag changed).
+Note: tokens issued by the login cluster need to be periodically re-validated when used on other clusters in the federation.  The period between revalidation attempts is configured with @Login.RemoteTokenRefresh@.  The default is 5 minutes.  A longer period reduces overhead from validating tokens, but means it may take longer for other clusters to notice when a token has been revoked or a user has changed status (being activated/deactivated, admin flag changed).
 
 To migrate users of existing clusters with separate user databases to use a single LoginCluster, use "arv-federation-migrate":merge-remote-account.html .
 
index e8cde5acec8fe41761e016f963a1ac8356588708..3f622112e95391d5364be1e16f211729b2c4a150 100644 (file)
@@ -35,7 +35,7 @@ TODO: extract this information based on git commit messages and generate changel
 <div class="releasenotes">
 </notextile>
 
-h2(#master). development master (as of 2020-10-28)
+h2(#main). development main (as of 2020-10-28)
 
 "Upgrading from 2.1.0":#v2_1_0
 
index 2cae817613699a4ba08467742c736e4827fa058e..bee91516bc12fc61e87a51b603361372ad64e358 100644 (file)
@@ -21,7 +21,11 @@ To access Arvados S3 using an S3 client library, you must tell it to use the URL
 
 The "bucket name" is an Arvados collection uuid, portable data hash, or project uuid.
 
-The bucket name must be encoded as the first path segment of every request.  This is what the S3 documentation calls "Path-Style Requests".
+Path-style and virtual host-style requests are supported.
+* A path-style request uses the hostname indicated by @Services.WebDAVDownload.ExternalURL@, with the bucket name in the first path segment: @https://download.example.com/zzzzz-4zz18-asdfgasdfgasdfg/@.
+* A virtual host-style request uses the hostname pattern indicated by @Services.WebDAV.ExternalURL@, with a bucket name in place of the leading @*@: @https://zzzzz-4zz18-asdfgasdfgasdfg.collections.example.com/@.
+
+If you have wildcard DNS, TLS, and routing set up, an S3 client configured with endpoint @collections.example.com@ should work regardless of which request style it uses.
 
 h3. Supported Operations
 
@@ -70,5 +74,16 @@ h3. Authorization mechanisms
 
 Keep-web accepts AWS Signature Version 4 (AWS4-HMAC-SHA256) as well as the older V2 AWS signature.
 
-* If your client uses V4 signatures exclusively: use the Arvados token's UUID part as AccessKey, and its secret part as SecretKey.  This is preferred.
-* If your client uses V2 signatures, or a combination of V2 and V4, or the Arvados token UUID is unknown: use the secret part of the Arvados token for both AccessKey and SecretKey.
+If your client uses V4 signatures exclusively _and_ your Arvados token was issued by the same cluster you are connecting to, you can use the Arvados token's UUID part as your S3 Access Key, and its secret part as your S3 Secret Key. This is preferred, where applicable.
+
+Example using cluster @zzzzz@:
+* Arvados token: @v2/zzzzz-gj3su-yyyyyyyyyyyyyyy/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@
+* Access Key: @zzzzz-gj3su-yyyyyyyyyyyyyyy@
+* Secret Key: @xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@
+
+In all other cases, replace every @/@ character in your Arvados token with @_@, and use the resulting string as both Access Key and Secret Key.
+
+Example using a cluster other than @zzzzz@ _or_ an S3 client that uses V2 signatures:
+* Arvados token: @v2/zzzzz-gj3su-yyyyyyyyyyyyyyy/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@
+* Access Key: @v2_zzzzz-gj3su-yyyyyyyyyyyyyyy_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@
+* Secret Key: @v2_zzzzz-gj3su-yyyyyyyyyyyyyyy_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx@
index 872a1bca7149acb22f891d243a1be316d4d7a9c8..ae96d0a3b5872dd17f1d5e91bec856ad7845c6b3 100644 (file)
@@ -103,7 +103,7 @@ table(table table-bordered table-condensed).
 |@=@, @!=@|string, number, timestamp, or null|Equality comparison|@["tail_uuid","=","xyzzy-j7d0g-fffffffffffffff"]@ @["tail_uuid","!=",null]@|
 |@<@, @<=@, @>=@, @>@|string, number, or timestamp|Ordering comparison|@["script_version",">","123"]@|
 |@like@, @ilike@|string|SQL pattern match.  Single character match is @_@ and wildcard is @%@. The @ilike@ operator is case-insensitive|@["script_version","like","d00220fb%"]@|
-|@in@, @not in@|array of strings|Set membership|@["script_version","in",["master","d00220fb38d4b85ca8fc28a8151702a2b9d1dec5"]]@|
+|@in@, @not in@|array of strings|Set membership|@["script_version","in",["main","d00220fb38d4b85ca8fc28a8151702a2b9d1dec5"]]@|
 |@is_a@|string|Arvados object type|@["head_uuid","is_a","arvados#collection"]@|
 |@exists@|string|Test if a subproperty is present.|@["properties","exists","my_subproperty"]@|
 
index 13fa8387679c533184f0686d31681731a7752eb2..aa7a58898a58dcb998f0de202db907a97843e5bf 100644 (file)
@@ -57,7 +57,7 @@ See "Specifying Git versions":#script_version below for more detail about accept
 
 h3(#script_version). Specifying Git versions
 
-The script_version attribute and arvados_sdk_version runtime constraint are typically given as a branch, tag, or commit hash, but there are many more ways to specify a Git commit. The "specifying revisions" section of the "gitrevisions manual page":http://git-scm.com/docs/gitrevisions.html has a definitive list. Arvados accepts Git versions in any format listed there that names a single commit (not a tree, a blob, or a range of commits). However, some kinds of names can be expected to resolve differently in Arvados than they do in your local repository. For example, <code>HEAD@{1}</code> refers to the local reflog, and @origin/master@ typically refers to a remote branch: neither is likely to work as desired if given as a Git version.
+The script_version attribute and arvados_sdk_version runtime constraint are typically given as a branch, tag, or commit hash, but there are many more ways to specify a Git commit. The "specifying revisions" section of the "gitrevisions manual page":http://git-scm.com/docs/gitrevisions.html has a definitive list. Arvados accepts Git versions in any format listed there that names a single commit (not a tree, a blob, or a range of commits). However, some kinds of names can be expected to resolve differently in Arvados than they do in your local repository. For example, <code>HEAD@{1}</code> refers to the local reflog, and @origin/main@ typically refers to a remote branch: neither is likely to work as desired if given as a Git version.
 
 h3. Runtime constraints
 
@@ -138,14 +138,14 @@ notextile. <div class="spaced-out">
 
 h4. Examples
 
-Run the script "crunch_scripts/hash.py" in the repository "you" using the "master" commit.  Arvados should re-use a previous job if the script_version of the previous job is the same as the current "master" commit. This works irrespective of whether the previous job was submitted using the name "master", a different branch name or tag indicating the same commit, a SHA-1 commit hash, etc.
+Run the script "crunch_scripts/hash.py" in the repository "you" using the "main" commit.  Arvados should re-use a previous job if the script_version of the previous job is the same as the current "main" commit. This works irrespective of whether the previous job was submitted using the name "main", a different branch name or tag indicating the same commit, a SHA-1 commit hash, etc.
 
 <notextile><pre>
 {
   "job": {
     "script": "hash.py",
     "repository": "<b>you</b>/<b>you</b>",
-    "script_version": "master",
+    "script_version": "main",
     "script_parameters": {
       "input": "c1bad4b39ca5a924e481008009d94e32+210"
     }
@@ -170,14 +170,14 @@ Run using exactly the version "d00220fb38d4b85ca8fc28a8151702a2b9d1dec5". Arvado
 }
 </pre></notextile>
 
-Arvados should re-use a previous job if the "script_version" of the previous job is between "earlier_version_tag" and the "master" commit (inclusive), but not the commit indicated by "blacklisted_version_tag". If there are no previous jobs matching these criteria, run the job using the "master" commit.
+Arvados should re-use a previous job if the "script_version" of the previous job is between "earlier_version_tag" and the "main" commit (inclusive), but not the commit indicated by "blacklisted_version_tag". If there are no previous jobs matching these criteria, run the job using the "main" commit.
 
 <notextile><pre>
 {
   "job": {
     "script": "hash.py",
     "repository": "<b>you</b>/<b>you</b>",
-    "script_version": "master",
+    "script_version": "main",
     "script_parameters": {
       "input": "c1bad4b39ca5a924e481008009d94e32+210"
     }
@@ -195,7 +195,7 @@ The same behavior, using filters:
   "job": {
     "script": "hash.py",
     "repository": "<b>you</b>/<b>you</b>",
-    "script_version": "master",
+    "script_version": "main",
     "script_parameters": {
       "input": "c1bad4b39ca5a924e481008009d94e32+210"
     }
@@ -208,14 +208,14 @@ The same behavior, using filters:
 }
 </pre></notextile>
 
-Run the script "crunch_scripts/monte-carlo.py" in the repository "you/you" using the current "master" commit. Because it is marked as "nondeterministic", this job will not be considered as a suitable candidate for future job submissions that use the "find_or_create" feature.
+Run the script "crunch_scripts/monte-carlo.py" in the repository "you/you" using the current "main" commit. Because it is marked as "nondeterministic", this job will not be considered as a suitable candidate for future job submissions that use the "find_or_create" feature.
 
 <notextile><pre>
 {
   "job": {
     "script": "monte-carlo.py",
     "repository": "<b>you</b>/<b>you</b>",
-    "script_version": "master",
+    "script_version": "main",
     "nondeterministic": true,
     "script_parameters": {
       "input": "c1bad4b39ca5a924e481008009d94e32+210"
index 40297aa05199b77ac317b8afc94843961b03702d..141072c51c451770830a9d22bd0fdd4185a826d9 100644 (file)
@@ -77,7 +77,7 @@ This is a pipeline named "Filter MD5 hash values" with two components, "do_hash"
     "do_hash": {
       "script": "hash.py",
       "repository": "<b>you</b>/<b>you</b>",
-      "script_version": "master",
+      "script_version": "main",
       "script_parameters": {
         "input": {
           "required": true,
@@ -90,7 +90,7 @@ This is a pipeline named "Filter MD5 hash values" with two components, "do_hash"
     "filter": {
       "script": "0-filter.py",
       "repository": "<b>you</b>/<b>you</b>",
-      "script_version": "master",
+      "script_version": "main",
       "script_parameters": {
         "input": {
           "output_of": "do_hash"
@@ -110,13 +110,13 @@ This pipeline consists of three components.  The components "thing1" and "thing2
     "cat_in_the_hat": {
       "script": "cat.py",
       "repository": "<b>you</b>/<b>you</b>",
-      "script_version": "master",
+      "script_version": "main",
       "script_parameters": { }
     },
     "thing1": {
       "script": "thing1.py",
       "repository": "<b>you</b>/<b>you</b>",
-      "script_version": "master",
+      "script_version": "main",
       "script_parameters": {
         "input": {
           "output_of": "cat_in_the_hat"
@@ -126,7 +126,7 @@ This pipeline consists of three components.  The components "thing1" and "thing2
     "thing2": {
       "script": "thing2.py",
       "repository": "<b>you</b>/<b>you</b>",
-      "script_version": "master",
+      "script_version": "main",
       "script_parameters": {
         "input": {
           "output_of": "cat_in_the_hat"
@@ -146,19 +146,19 @@ This pipeline consists of three components.  The component "cleanup" depends on
     "thing1": {
       "script": "thing1.py",
       "repository": "<b>you</b>/<b>you</b>",
-      "script_version": "master",
+      "script_version": "main",
       "script_parameters": { }
     },
     "thing2": {
       "script": "thing2.py",
       "repository": "<b>you</b>/<b>you</b>",
-      "script_version": "master",
+      "script_version": "main",
       "script_parameters": { }
     },
     "cleanup": {
       "script": "cleanup.py",
       "repository": "<b>you</b>/<b>you</b>",
-      "script_version": "master",
+      "script_version": "main",
       "script_parameters": {
         "mess1": {
           "output_of": "thing1"
index 24f37bfb4f8ee25b3b32b691624e06586f9b42d1..b797c1958e4102cf4551000ed1d691d887e1e682 100644 (file)
@@ -20,7 +20,7 @@ SPDX-License-Identifier: CC-BY-SA-3.0
 
 h2(#introduction). Introduction
 
-The Keep-web server provides read/write HTTP (WebDAV) access to files stored in Keep.  This makes it easy to access files in Keep from a browser, or mount Keep as a network folder using WebDAV support in various operating systems. It serves public data to unauthenticated clients, and serves private data to clients that supply Arvados API tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides TLS support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/keep-web for more detail.
+The Keep-web server provides read/write access to files stored in Keep using WebDAV and S3 protocols.  This makes it easy to access files in Keep from a browser, or mount Keep as a network folder using WebDAV support in various operating systems. It serves public data to unauthenticated clients, and serves private data to clients that supply Arvados API tokens. It can be installed anywhere with access to Keep services, typically behind a web proxy that provides TLS support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/keep-web for more detail.
 
 h2(#dns). Configure DNS
 
@@ -61,6 +61,8 @@ Collections can be served from their own subdomain:
 </code></pre>
 </notextile>
 
+This option is preferred if you plan to access Keep using third-party S3 client software, because it accommodates S3 virtual host-style requests and path-style requests without any special client configuration.
+
 h4. Under the main domain
 
 Alternately, they can go under the main domain by including @--@:
index cd4d1cc715e0f8dda3fdbe362ad096173c516f4e..a68d7ca21eeecfd58089d9dbaab1d481fd1d1b6f 100644 (file)
@@ -245,9 +245,9 @@ node.json gives a high level overview about the instance such as name, price, an
 ** Contains about resource consumption (RAM, cpu, disk, network) on the node while it was running
 This is different from the log crunchstat.txt because it includes resource consumption of Arvados components that run on the node outside the container such as crunch-run and other processes related to the Keep file system.
 
-For the highest level logs, the logs are tracking the container that ran the @arvados-cwl-runner@ process which you can think of as the “mastermind” behind tracking which parts of the CWL workflow need to be run when, which have been run already, what order they need to be run, which can be run simultaneously, and so forth and then sending out the related container requests.  Each step then has their own logs related to containers running a CWL step of the workflow including a log of standard error that contains the standard error of the code run in that CWL step.  Those logs can be found by expanding the steps and clicking on the link to the log collection.
+For the highest level logs, the logs are tracking the container that ran the @arvados-cwl-runner@ process which you can think of as the “workflow runner”. It tracks which parts of the CWL workflow need to be run when, which have been run already, what order they need to be run, which can be run simultaneously, and so forth and then creates the necessary container requests.  Each step has its own logs related to containers running a CWL step of the workflow including a log of standard error that contains the standard error of the code run in that CWL step.  Those logs can be found by expanding the steps and clicking on the link to the log collection.
 
-Let’s take a peek at a few of these logs to get you more familiar with them.  First, we can look at the @stderr.txt@ of the highest level process.  Again recall this should be of the “mastermind” @arvados-cwl-runner@ process.  You can click on the log to download it to your local machine, and when you look at the contents - you should see something like the following...
+Let’s take a peek at a few of these logs to get you more familiar with them.  First, we can look at the @stderr.txt@ of the highest level process.  Again recall this should be of the “workflow runner” @arvados-cwl-runner@ process.  You can click on the log to download it to your local machine, and when you look at the contents - you should see something like the following...
 
 <pre><code>2020-06-22T20:30:04.737703197Z INFO /usr/bin/arvados-cwl-runner 2.0.3, arvados-python-client 2.0.3, cwltool 1.0.20190831161204
 2020-06-22T20:30:04.743250012Z INFO Resolved '/var/lib/cwl/workflow.json#main' to 'file:///var/lib/cwl/workflow.json#main'
index 986faa7b05e33e325a6bc1c15b4283ec6d79d9ed..130368124cdd904a40ceb3938122181594c26804 100644 (file)
@@ -462,7 +462,18 @@ func (conn *Conn) UserUpdate(ctx context.Context, options arvados.UpdateOptions)
        if options.BypassFederation {
                return conn.local.UserUpdate(ctx, options)
        }
-       return conn.chooseBackend(options.UUID).UserUpdate(ctx, options)
+       resp, err := conn.chooseBackend(options.UUID).UserUpdate(ctx, options)
+       if err != nil {
+               return resp, err
+       }
+       if !strings.HasPrefix(options.UUID, conn.cluster.ClusterID) {
+               // Copy the updated user record to the local cluster
+               err = conn.batchUpdateUsers(ctx, arvados.ListOptions{}, []arvados.User{resp})
+               if err != nil {
+                       return arvados.User{}, err
+               }
+       }
+       return resp, err
 }
 
 func (conn *Conn) UserUpdateUUID(ctx context.Context, options arvados.UpdateUUIDOptions) (arvados.User, error) {
index 3da01ca6823562a6b13509adf58b9e621f704dec..6ac8c2e338205ad0fbcf0ae30e019b49b2705116 100644 (file)
@@ -8,6 +8,7 @@ import (
        "bytes"
        "context"
        "encoding/json"
+       "fmt"
        "io"
        "io/ioutil"
        "math"
@@ -15,7 +16,10 @@ import (
        "net/http"
        "net/url"
        "os"
+       "os/exec"
        "path/filepath"
+       "strconv"
+       "strings"
 
        "git.arvados.org/arvados.git/lib/boot"
        "git.arvados.org/arvados.git/lib/config"
@@ -280,6 +284,80 @@ func (s *IntegrationSuite) TestGetCollectionByPDH(c *check.C) {
        c.Check(coll.PortableDataHash, check.Equals, pdh)
 }
 
+func (s *IntegrationSuite) TestS3WithFederatedToken(c *check.C) {
+       if _, err := exec.LookPath("s3cmd"); err != nil {
+               c.Skip("s3cmd not in PATH")
+               return
+       }
+
+       testText := "IntegrationSuite.TestS3WithFederatedToken"
+
+       conn1 := s.conn("z1111")
+       rootctx1, _, _ := s.rootClients("z1111")
+       userctx1, ac1, _, _ := s.userClients(rootctx1, c, conn1, "z1111", true)
+       conn3 := s.conn("z3333")
+
+       createColl := func(clusterID string) arvados.Collection {
+               _, ac, kc := s.clientsWithToken(clusterID, ac1.AuthToken)
+               var coll arvados.Collection
+               fs, err := coll.FileSystem(ac, kc)
+               c.Assert(err, check.IsNil)
+               f, err := fs.OpenFile("test.txt", os.O_CREATE|os.O_RDWR, 0777)
+               c.Assert(err, check.IsNil)
+               _, err = io.WriteString(f, testText)
+               c.Assert(err, check.IsNil)
+               err = f.Close()
+               c.Assert(err, check.IsNil)
+               mtxt, err := fs.MarshalManifest(".")
+               c.Assert(err, check.IsNil)
+               coll, err = s.conn(clusterID).CollectionCreate(userctx1, arvados.CreateOptions{Attrs: map[string]interface{}{
+                       "manifest_text": mtxt,
+               }})
+               c.Assert(err, check.IsNil)
+               return coll
+       }
+
+       for _, trial := range []struct {
+               clusterID string // create the collection on this cluster (then use z3333 to access it)
+               token     string
+       }{
+               // Try the hardest test first: z3333 hasn't seen
+               // z1111's token yet, and we're just passing the
+               // opaque secret part, so z3333 has to guess that it
+               // belongs to z1111.
+               {"z1111", strings.Split(ac1.AuthToken, "/")[2]},
+               {"z3333", strings.Split(ac1.AuthToken, "/")[2]},
+               {"z1111", strings.Replace(ac1.AuthToken, "/", "_", -1)},
+               {"z3333", strings.Replace(ac1.AuthToken, "/", "_", -1)},
+       } {
+               c.Logf("================ %v", trial)
+               coll := createColl(trial.clusterID)
+
+               cfgjson, err := conn3.ConfigGet(userctx1)
+               c.Assert(err, check.IsNil)
+               var cluster arvados.Cluster
+               err = json.Unmarshal(cfgjson, &cluster)
+               c.Assert(err, check.IsNil)
+
+               c.Logf("TokenV2 is %s", ac1.AuthToken)
+               host := cluster.Services.WebDAV.ExternalURL.Host
+               s3args := []string{
+                       "--ssl", "--no-check-certificate",
+                       "--host=" + host, "--host-bucket=" + host,
+                       "--access_key=" + trial.token, "--secret_key=" + trial.token,
+               }
+               buf, err := exec.Command("s3cmd", append(s3args, "ls", "s3://"+coll.UUID)...).CombinedOutput()
+               c.Check(err, check.IsNil)
+               c.Check(string(buf), check.Matches, `.* `+fmt.Sprintf("%d", len(testText))+` +s3://`+coll.UUID+`/test.txt\n`)
+
+               buf, err = exec.Command("s3cmd", append(s3args, "get", "s3://"+coll.UUID+"/test.txt", c.MkDir()+"/tmpfile")...).CombinedOutput()
+               // Command fails because we don't return Etag header.
+               // c.Check(err, check.IsNil)
+               flen := strconv.Itoa(len(testText))
+               c.Check(string(buf), check.Matches, `(?ms).*`+flen+` of `+flen+`.*`)
+       }
+}
+
 func (s *IntegrationSuite) TestGetCollectionAsAnonymous(c *check.C) {
        conn1 := s.conn("z1111")
        conn3 := s.conn("z3333")
index f43cc1ddee295d506854fc97447c0cfe46d868ab..cf4dbc47673e7713e8f9d77c2ebbb449077e4447 100644 (file)
@@ -24,7 +24,11 @@ func Test(t *testing.T) {
 
 var _ = check.Suite(&RPCSuite{})
 
-const contextKeyTestTokens = "testTokens"
+type key int
+
+const (
+       contextKeyTestTokens key = iota
+)
 
 type RPCSuite struct {
        log  logrus.FieldLogger
diff --git a/lib/costanalyzer/cmd.go b/lib/costanalyzer/cmd.go
new file mode 100644 (file)
index 0000000..9b06852
--- /dev/null
@@ -0,0 +1,43 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package costanalyzer
+
+import (
+       "io"
+
+       "git.arvados.org/arvados.git/lib/config"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "github.com/sirupsen/logrus"
+)
+
+var Command command
+
+type command struct{}
+
+type NoPrefixFormatter struct{}
+
+func (f *NoPrefixFormatter) Format(entry *logrus.Entry) ([]byte, error) {
+       return []byte(entry.Message), nil
+}
+
+// RunCommand implements the subcommand "costanalyzer <collection> <collection> ..."
+func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+       var err error
+       logger := ctxlog.New(stderr, "text", "info")
+       defer func() {
+               if err != nil {
+                       logger.Error("\n" + err.Error() + "\n")
+               }
+       }()
+
+       logger.SetFormatter(new(NoPrefixFormatter))
+
+       loader := config.NewLoader(stdin, logger)
+       loader.SkipLegacy = true
+
+       exitcode, err := costanalyzer(prog, args, loader, logger, stdout, stderr)
+
+       return exitcode
+}
diff --git a/lib/costanalyzer/costanalyzer.go b/lib/costanalyzer/costanalyzer.go
new file mode 100644 (file)
index 0000000..4284542
--- /dev/null
@@ -0,0 +1,528 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package costanalyzer
+
+import (
+       "encoding/json"
+       "errors"
+       "flag"
+       "fmt"
+       "git.arvados.org/arvados.git/lib/config"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/arvadosclient"
+       "git.arvados.org/arvados.git/sdk/go/keepclient"
+       "io"
+       "io/ioutil"
+       "net/http"
+       "os"
+       "strconv"
+       "strings"
+       "time"
+
+       "github.com/sirupsen/logrus"
+)
+
+type nodeInfo struct {
+       // Legacy (records created by Arvados Node Manager with Arvados <= 1.4.3)
+       Properties struct {
+               CloudNode struct {
+                       Price float64
+                       Size  string
+               } `json:"cloud_node"`
+       }
+       // Modern
+       ProviderType string
+       Price        float64
+}
+
+type arrayFlags []string
+
+func (i *arrayFlags) String() string {
+       return ""
+}
+
+func (i *arrayFlags) Set(value string) error {
+       *i = append(*i, value)
+       return nil
+}
+
+func parseFlags(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stderr io.Writer) (exitCode int, uuids arrayFlags, resultsDir string, cache bool, err error) {
+       flags := flag.NewFlagSet("", flag.ContinueOnError)
+       flags.SetOutput(stderr)
+       flags.Usage = func() {
+               fmt.Fprintf(flags.Output(), `
+Usage:
+  %s [options ...]
+
+       This program analyzes the cost of Arvados container requests. For each uuid
+       supplied, it creates a CSV report that lists all the containers used to
+       fulfill the container request, together with the machine type and cost of
+       each container.
+
+       When supplied with the uuid of a container request, it will calculate the
+       cost of that container request and all its children. When suplied with a
+       project uuid or when supplied with multiple container request uuids, it will
+       create a CSV report for each supplied uuid, as well as a CSV file with
+       aggregate cost accounting for all supplied uuids. The aggregate cost report
+       takes container reuse into account: if a container was reused between several
+       container requests, its cost will only be counted once.
+
+       To get the node costs, the progam queries the Arvados API for current cost
+       data for each node type used. This means that the reported cost always
+       reflects the cost data as currently defined in the Arvados API configuration
+       file.
+
+       Caveats:
+       - the Arvados API configuration cost data may be out of sync with the cloud
+       provider.
+       - when generating reports for older container requests, the cost data in the
+       Arvados API configuration file may have changed since the container request
+       was fulfilled. This program uses the cost data stored at the time of the
+       execution of the container, stored in the 'node.json' file in its log
+       collection.
+
+       In order to get the data for the uuids supplied, the ARVADOS_API_HOST and
+       ARVADOS_API_TOKEN environment variables must be set.
+
+Options:
+`, prog)
+               flags.PrintDefaults()
+       }
+       loglevel := flags.String("log-level", "info", "logging `level` (debug, info, ...)")
+       flags.StringVar(&resultsDir, "output", "", "output `directory` for the CSV reports (required)")
+       flags.Var(&uuids, "uuid", "Toplevel `project or container request` uuid. May be specified more than once. (required)")
+       flags.BoolVar(&cache, "cache", true, "create and use a local disk cache of Arvados objects")
+       err = flags.Parse(args)
+       if err == flag.ErrHelp {
+               err = nil
+               exitCode = 1
+               return
+       } else if err != nil {
+               exitCode = 2
+               return
+       }
+
+       if len(uuids) < 1 {
+               flags.Usage()
+               err = fmt.Errorf("Error: no uuid(s) provided")
+               exitCode = 2
+               return
+       }
+
+       if resultsDir == "" {
+               flags.Usage()
+               err = fmt.Errorf("Error: output directory must be specified")
+               exitCode = 2
+               return
+       }
+
+       lvl, err := logrus.ParseLevel(*loglevel)
+       if err != nil {
+               exitCode = 2
+               return
+       }
+       logger.SetLevel(lvl)
+       if !cache {
+               logger.Debug("Caching disabled\n")
+       }
+       return
+}
+
+func ensureDirectory(logger *logrus.Logger, dir string) (err error) {
+       statData, err := os.Stat(dir)
+       if os.IsNotExist(err) {
+               err = os.MkdirAll(dir, 0700)
+               if err != nil {
+                       return fmt.Errorf("error creating directory %s: %s", dir, err.Error())
+               }
+       } else {
+               if !statData.IsDir() {
+                       return fmt.Errorf("the path %s is not a directory", dir)
+               }
+       }
+       return
+}
+
+func addContainerLine(logger *logrus.Logger, node nodeInfo, cr arvados.ContainerRequest, container arvados.Container) (csv string, cost float64) {
+       csv = cr.UUID + ","
+       csv += cr.Name + ","
+       csv += container.UUID + ","
+       csv += string(container.State) + ","
+       if container.StartedAt != nil {
+               csv += container.StartedAt.String() + ","
+       } else {
+               csv += ","
+       }
+
+       var delta time.Duration
+       if container.FinishedAt != nil {
+               csv += container.FinishedAt.String() + ","
+               delta = container.FinishedAt.Sub(*container.StartedAt)
+               csv += strconv.FormatFloat(delta.Seconds(), 'f', 0, 64) + ","
+       } else {
+               csv += ",,"
+       }
+       var price float64
+       var size string
+       if node.Properties.CloudNode.Price != 0 {
+               price = node.Properties.CloudNode.Price
+               size = node.Properties.CloudNode.Size
+       } else {
+               price = node.Price
+               size = node.ProviderType
+       }
+       cost = delta.Seconds() / 3600 * price
+       csv += size + "," + strconv.FormatFloat(price, 'f', 8, 64) + "," + strconv.FormatFloat(cost, 'f', 8, 64) + "\n"
+       return
+}
+
+func loadCachedObject(logger *logrus.Logger, file string, uuid string, object interface{}) (reload bool) {
+       reload = true
+       if strings.Contains(uuid, "-j7d0g-") {
+               // We do not cache projects, they have no final state
+               return
+       }
+       // See if we have a cached copy of this object
+       _, err := os.Stat(file)
+       if err != nil {
+               return
+       }
+       data, err := ioutil.ReadFile(file)
+       if err != nil {
+               logger.Errorf("error reading %q: %s", file, err)
+               return
+       }
+       err = json.Unmarshal(data, &object)
+       if err != nil {
+               logger.Errorf("failed to unmarshal json: %s: %s", data, err)
+               return
+       }
+
+       // See if it is in a final state, if that makes sense
+       switch v := object.(type) {
+       case *arvados.ContainerRequest:
+               if v.State == arvados.ContainerRequestStateFinal {
+                       reload = false
+                       logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file)
+               }
+       case *arvados.Container:
+               if v.State == arvados.ContainerStateComplete || v.State == arvados.ContainerStateCancelled {
+                       reload = false
+                       logger.Debugf("Loaded object %s from local cache (%s)\n", uuid, file)
+               }
+       }
+       return
+}
+
+// Load an Arvados object.
+func loadObject(logger *logrus.Logger, ac *arvados.Client, path string, uuid string, cache bool, object interface{}) (err error) {
+       file := uuid + ".json"
+
+       var reload bool
+       var cacheDir string
+
+       if !cache {
+               reload = true
+       } else {
+               homeDir, err := os.UserHomeDir()
+               if err != nil {
+                       reload = true
+                       logger.Info("Unable to determine current user home directory, not using cache")
+               } else {
+                       cacheDir = homeDir + "/.cache/arvados/costanalyzer/"
+                       err = ensureDirectory(logger, cacheDir)
+                       if err != nil {
+                               reload = true
+                               logger.Infof("Unable to create cache directory at %s, not using cache: %s", cacheDir, err.Error())
+                       } else {
+                               reload = loadCachedObject(logger, cacheDir+file, uuid, object)
+                       }
+               }
+       }
+       if !reload {
+               return
+       }
+
+       if strings.Contains(uuid, "-j7d0g-") {
+               err = ac.RequestAndDecode(&object, "GET", "arvados/v1/groups/"+uuid, nil, nil)
+       } else if strings.Contains(uuid, "-xvhdp-") {
+               err = ac.RequestAndDecode(&object, "GET", "arvados/v1/container_requests/"+uuid, nil, nil)
+       } else if strings.Contains(uuid, "-dz642-") {
+               err = ac.RequestAndDecode(&object, "GET", "arvados/v1/containers/"+uuid, nil, nil)
+       } else {
+               err = fmt.Errorf("unsupported object type with UUID %q:\n  %s", uuid, err)
+               return
+       }
+       if err != nil {
+               err = fmt.Errorf("error loading object with UUID %q:\n  %s", uuid, err)
+               return
+       }
+       encoded, err := json.MarshalIndent(object, "", " ")
+       if err != nil {
+               err = fmt.Errorf("error marshaling object with UUID %q:\n  %s", uuid, err)
+               return
+       }
+       if cacheDir != "" {
+               err = ioutil.WriteFile(cacheDir+file, encoded, 0644)
+               if err != nil {
+                       err = fmt.Errorf("error writing file %s:\n  %s", file, err)
+                       return
+               }
+       }
+       return
+}
+
+func getNode(arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, cr arvados.ContainerRequest) (node nodeInfo, err error) {
+       if cr.LogUUID == "" {
+               err = errors.New("No log collection")
+               return
+       }
+
+       var collection arvados.Collection
+       err = ac.RequestAndDecode(&collection, "GET", "arvados/v1/collections/"+cr.LogUUID, nil, nil)
+       if err != nil {
+               err = fmt.Errorf("error getting collection: %s", err)
+               return
+       }
+
+       var fs arvados.CollectionFileSystem
+       fs, err = collection.FileSystem(ac, kc)
+       if err != nil {
+               err = fmt.Errorf("error opening collection as filesystem: %s", err)
+               return
+       }
+       var f http.File
+       f, err = fs.Open("node.json")
+       if err != nil {
+               err = fmt.Errorf("error opening file 'node.json' in collection %s: %s", cr.LogUUID, err)
+               return
+       }
+
+       err = json.NewDecoder(f).Decode(&node)
+       if err != nil {
+               err = fmt.Errorf("error reading file 'node.json' in collection %s: %s", cr.LogUUID, err)
+               return
+       }
+       return
+}
+
+func handleProject(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]float64, err error) {
+
+       cost = make(map[string]float64)
+
+       var project arvados.Group
+       err = loadObject(logger, ac, uuid, uuid, cache, &project)
+       if err != nil {
+               return nil, fmt.Errorf("error loading object %s: %s", uuid, err.Error())
+       }
+
+       var childCrs map[string]interface{}
+       filterset := []arvados.Filter{
+               {
+                       Attr:     "owner_uuid",
+                       Operator: "=",
+                       Operand:  project.UUID,
+               },
+               {
+                       Attr:     "requesting_container_uuid",
+                       Operator: "=",
+                       Operand:  nil,
+               },
+       }
+       err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
+               "filters": filterset,
+               "limit":   10000,
+       })
+       if err != nil {
+               return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
+       }
+       if value, ok := childCrs["items"]; ok {
+               logger.Infof("Collecting top level container requests in project %s\n", uuid)
+               items := value.([]interface{})
+               for _, item := range items {
+                       itemMap := item.(map[string]interface{})
+                       crCsv, err := generateCrCsv(logger, itemMap["uuid"].(string), arv, ac, kc, resultsDir, cache)
+                       if err != nil {
+                               return nil, fmt.Errorf("error generating container_request CSV: %s", err.Error())
+                       }
+                       for k, v := range crCsv {
+                               cost[k] = v
+                       }
+               }
+       } else {
+               logger.Infof("No top level container requests found in project %s\n", uuid)
+       }
+       return
+}
+
+func generateCrCsv(logger *logrus.Logger, uuid string, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, resultsDir string, cache bool) (cost map[string]float64, err error) {
+
+       cost = make(map[string]float64)
+
+       csv := "CR UUID,CR name,Container UUID,State,Started At,Finished At,Duration in seconds,Compute node type,Hourly node cost,Total cost\n"
+       var tmpCsv string
+       var tmpTotalCost float64
+       var totalCost float64
+
+       // This is a container request, find the container
+       var cr arvados.ContainerRequest
+       err = loadObject(logger, ac, uuid, uuid, cache, &cr)
+       if err != nil {
+               return nil, fmt.Errorf("error loading cr object %s: %s", uuid, err)
+       }
+       var container arvados.Container
+       err = loadObject(logger, ac, uuid, cr.ContainerUUID, cache, &container)
+       if err != nil {
+               return nil, fmt.Errorf("error loading container object %s: %s", cr.ContainerUUID, err)
+       }
+
+       topNode, err := getNode(arv, ac, kc, cr)
+       if err != nil {
+               return nil, fmt.Errorf("error getting node %s: %s", cr.UUID, err)
+       }
+       tmpCsv, totalCost = addContainerLine(logger, topNode, cr, container)
+       csv += tmpCsv
+       totalCost += tmpTotalCost
+       cost[container.UUID] = totalCost
+
+       // Find all container requests that have the container we found above as requesting_container_uuid
+       var childCrs arvados.ContainerRequestList
+       filterset := []arvados.Filter{
+               {
+                       Attr:     "requesting_container_uuid",
+                       Operator: "=",
+                       Operand:  container.UUID,
+               }}
+       err = ac.RequestAndDecode(&childCrs, "GET", "arvados/v1/container_requests", nil, map[string]interface{}{
+               "filters": filterset,
+               "limit":   10000,
+       })
+       if err != nil {
+               return nil, fmt.Errorf("error querying container_requests: %s", err.Error())
+       }
+       logger.Infof("Collecting child containers for container request %s", uuid)
+       for _, cr2 := range childCrs.Items {
+               logger.Info(".")
+               node, err := getNode(arv, ac, kc, cr2)
+               if err != nil {
+                       return nil, fmt.Errorf("error getting node %s: %s", cr2.UUID, err)
+               }
+               logger.Debug("\nChild container: " + cr2.ContainerUUID + "\n")
+               var c2 arvados.Container
+               err = loadObject(logger, ac, uuid, cr2.ContainerUUID, cache, &c2)
+               if err != nil {
+                       return nil, fmt.Errorf("error loading object %s: %s", cr2.ContainerUUID, err)
+               }
+               tmpCsv, tmpTotalCost = addContainerLine(logger, node, cr2, c2)
+               cost[cr2.ContainerUUID] = tmpTotalCost
+               csv += tmpCsv
+               totalCost += tmpTotalCost
+       }
+       logger.Info(" done\n")
+
+       csv += "TOTAL,,,,,,,,," + strconv.FormatFloat(totalCost, 'f', 8, 64) + "\n"
+
+       // Write the resulting CSV file
+       fName := resultsDir + "/" + uuid + ".csv"
+       err = ioutil.WriteFile(fName, []byte(csv), 0644)
+       if err != nil {
+               return nil, fmt.Errorf("error writing file with path %s: %s", fName, err.Error())
+       }
+       logger.Infof("\nUUID report in %s\n\n", fName)
+
+       return
+}
+
+func costanalyzer(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int, err error) {
+       exitcode, uuids, resultsDir, cache, err := parseFlags(prog, args, loader, logger, stderr)
+       if exitcode != 0 {
+               return
+       }
+       err = ensureDirectory(logger, resultsDir)
+       if err != nil {
+               exitcode = 3
+               return
+       }
+
+       // Arvados Client setup
+       arv, err := arvadosclient.MakeArvadosClient()
+       if err != nil {
+               err = fmt.Errorf("error creating Arvados object: %s", err)
+               exitcode = 1
+               return
+       }
+       kc, err := keepclient.MakeKeepClient(arv)
+       if err != nil {
+               err = fmt.Errorf("error creating Keep object: %s", err)
+               exitcode = 1
+               return
+       }
+
+       ac := arvados.NewClientFromEnv()
+
+       cost := make(map[string]float64)
+       for _, uuid := range uuids {
+               if strings.Contains(uuid, "-j7d0g-") {
+                       // This is a project (group)
+                       cost, err = handleProject(logger, uuid, arv, ac, kc, resultsDir, cache)
+                       if err != nil {
+                               exitcode = 1
+                               return
+                       }
+                       for k, v := range cost {
+                               cost[k] = v
+                       }
+               } else if strings.Contains(uuid, "-xvhdp-") {
+                       // This is a container request
+                       var crCsv map[string]float64
+                       crCsv, err = generateCrCsv(logger, uuid, arv, ac, kc, resultsDir, cache)
+                       if err != nil {
+                               err = fmt.Errorf("Error generating container_request CSV for uuid %s: %s", uuid, err.Error())
+                               exitcode = 2
+                               return
+                       }
+                       for k, v := range crCsv {
+                               cost[k] = v
+                       }
+               } else if strings.Contains(uuid, "-tpzed-") {
+                       // This is a user. The "Home" project for a user is not a real project.
+                       // It is identified by the user uuid. As such, cost analysis for the
+                       // "Home" project is not supported by this program. Skip this uuid, but
+                       // keep going.
+                       logger.Errorf("Cost analysis is not supported for the 'Home' project: %s", uuid)
+               }
+       }
+
+       if len(cost) == 0 {
+               logger.Info("Nothing to do!\n")
+               return
+       }
+
+       var csv string
+
+       csv = "# Aggregate cost accounting for uuids:\n"
+       for _, uuid := range uuids {
+               csv += "# " + uuid + "\n"
+       }
+
+       var total float64
+       for k, v := range cost {
+               csv += k + "," + strconv.FormatFloat(v, 'f', 8, 64) + "\n"
+               total += v
+       }
+
+       csv += "TOTAL," + strconv.FormatFloat(total, 'f', 8, 64) + "\n"
+
+       // Write the resulting CSV file
+       aFile := resultsDir + "/" + time.Now().Format("2006-01-02-15-04-05") + "-aggregate-costaccounting.csv"
+       err = ioutil.WriteFile(aFile, []byte(csv), 0644)
+       if err != nil {
+               err = fmt.Errorf("Error writing file with path %s: %s", aFile, err.Error())
+               exitcode = 1
+               return
+       }
+       logger.Infof("Aggregate cost accounting for all supplied uuids in %s\n", aFile)
+       return
+}
diff --git a/lib/costanalyzer/costanalyzer_test.go b/lib/costanalyzer/costanalyzer_test.go
new file mode 100644 (file)
index 0000000..4fab93b
--- /dev/null
@@ -0,0 +1,266 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package costanalyzer
+
+import (
+       "bytes"
+       "io"
+       "io/ioutil"
+       "os"
+       "regexp"
+       "testing"
+
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/arvadosclient"
+       "git.arvados.org/arvados.git/sdk/go/arvadostest"
+       "git.arvados.org/arvados.git/sdk/go/keepclient"
+       "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
+
+var _ = check.Suite(&Suite{})
+
+type Suite struct{}
+
+func (s *Suite) TearDownSuite(c *check.C) {
+       // Undo any changes/additions to the database so they don't affect subsequent tests.
+       arvadostest.ResetEnv()
+}
+
+func (s *Suite) SetUpSuite(c *check.C) {
+       arvadostest.StartAPI()
+       arvadostest.StartKeep(2, true)
+
+       // Get the various arvados, arvadosclient, and keep client objects
+       ac := arvados.NewClientFromEnv()
+       arv, err := arvadosclient.MakeArvadosClient()
+       c.Assert(err, check.Equals, nil)
+       arv.ApiToken = arvadostest.ActiveToken
+       kc, err := keepclient.MakeKeepClient(arv)
+       c.Assert(err, check.Equals, nil)
+
+       standardE4sV3JSON := `{
+    "Name": "Standard_E4s_v3",
+    "ProviderType": "Standard_E4s_v3",
+    "VCPUs": 4,
+    "RAM": 34359738368,
+    "Scratch": 64000000000,
+    "IncludedScratch": 64000000000,
+    "AddedScratch": 0,
+    "Price": 0.292,
+    "Preemptible": false
+}`
+       standardD32sV3JSON := `{
+    "Name": "Standard_D32s_v3",
+    "ProviderType": "Standard_D32s_v3",
+    "VCPUs": 32,
+    "RAM": 137438953472,
+    "Scratch": 256000000000,
+    "IncludedScratch": 256000000000,
+    "AddedScratch": 0,
+    "Price": 1.76,
+    "Preemptible": false
+}`
+
+       standardA1V2JSON := `{
+    "Name": "a1v2",
+    "ProviderType": "Standard_A1_v2",
+    "VCPUs": 1,
+    "RAM": 2147483648,
+    "Scratch": 10000000000,
+    "IncludedScratch": 10000000000,
+    "AddedScratch": 0,
+    "Price": 0.043,
+    "Preemptible": false
+}`
+
+       standardA2V2JSON := `{
+    "Name": "a2v2",
+    "ProviderType": "Standard_A2_v2",
+    "VCPUs": 2,
+    "RAM": 4294967296,
+    "Scratch": 20000000000,
+    "IncludedScratch": 20000000000,
+    "AddedScratch": 0,
+    "Price": 0.091,
+    "Preemptible": false
+}`
+
+       legacyD1V2JSON := `{
+    "properties": {
+        "cloud_node": {
+            "price": 0.073001,
+            "size": "Standard_D1_v2"
+        },
+        "total_cpu_cores": 1,
+        "total_ram_mb": 3418,
+        "total_scratch_mb": 51170
+    }
+}`
+
+       // Our fixtures do not actually contain file contents. Populate the log collections we're going to use with the node.json file
+       createNodeJSON(c, arv, ac, kc, arvadostest.CompletedContainerRequestUUID, arvadostest.LogCollectionUUID, standardE4sV3JSON)
+       createNodeJSON(c, arv, ac, kc, arvadostest.CompletedContainerRequestUUID2, arvadostest.LogCollectionUUID2, standardD32sV3JSON)
+
+       createNodeJSON(c, arv, ac, kc, arvadostest.CompletedDiagnosticsContainerRequest1UUID, arvadostest.DiagnosticsContainerRequest1LogCollectionUUID, standardA1V2JSON)
+       createNodeJSON(c, arv, ac, kc, arvadostest.CompletedDiagnosticsContainerRequest2UUID, arvadostest.DiagnosticsContainerRequest2LogCollectionUUID, standardA1V2JSON)
+       createNodeJSON(c, arv, ac, kc, arvadostest.CompletedDiagnosticsHasher1ContainerRequestUUID, arvadostest.Hasher1LogCollectionUUID, standardA1V2JSON)
+       createNodeJSON(c, arv, ac, kc, arvadostest.CompletedDiagnosticsHasher2ContainerRequestUUID, arvadostest.Hasher2LogCollectionUUID, standardA2V2JSON)
+       createNodeJSON(c, arv, ac, kc, arvadostest.CompletedDiagnosticsHasher3ContainerRequestUUID, arvadostest.Hasher3LogCollectionUUID, legacyD1V2JSON)
+}
+
+func createNodeJSON(c *check.C, arv *arvadosclient.ArvadosClient, ac *arvados.Client, kc *keepclient.KeepClient, crUUID string, logUUID string, nodeJSON string) {
+       // Get the CR
+       var cr arvados.ContainerRequest
+       err := ac.RequestAndDecode(&cr, "GET", "arvados/v1/container_requests/"+crUUID, nil, nil)
+       c.Assert(err, check.Equals, nil)
+       c.Assert(cr.LogUUID, check.Equals, logUUID)
+
+       // Get the log collection
+       var coll arvados.Collection
+       err = ac.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+cr.LogUUID, nil, nil)
+       c.Assert(err, check.IsNil)
+
+       // Create a node.json file -- the fixture doesn't actually contain the contents of the collection.
+       fs, err := coll.FileSystem(ac, kc)
+       c.Assert(err, check.IsNil)
+       f, err := fs.OpenFile("node.json", os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0777)
+       c.Assert(err, check.IsNil)
+       _, err = io.WriteString(f, nodeJSON)
+       c.Assert(err, check.IsNil)
+       err = f.Close()
+       c.Assert(err, check.IsNil)
+
+       // Flush the data to Keep
+       mtxt, err := fs.MarshalManifest(".")
+       c.Assert(err, check.IsNil)
+       c.Assert(mtxt, check.NotNil)
+
+       // Update collection record
+       err = ac.RequestAndDecode(&coll, "PUT", "arvados/v1/collections/"+cr.LogUUID, nil, map[string]interface{}{
+               "collection": map[string]interface{}{
+                       "manifest_text": mtxt,
+               },
+       })
+       c.Assert(err, check.IsNil)
+}
+
+func (*Suite) TestUsage(c *check.C) {
+       var stdout, stderr bytes.Buffer
+       exitcode := Command.RunCommand("costanalyzer.test", []string{"-help", "-log-level=debug"}, &bytes.Buffer{}, &stdout, &stderr)
+       c.Check(exitcode, check.Equals, 1)
+       c.Check(stdout.String(), check.Equals, "")
+       c.Check(stderr.String(), check.Matches, `(?ms).*Usage:.*`)
+}
+
+func (*Suite) TestContainerRequestUUID(c *check.C) {
+       var stdout, stderr bytes.Buffer
+       // Run costanalyzer with 1 container request uuid
+       exitcode := Command.RunCommand("costanalyzer.test", []string{"-uuid", arvadostest.CompletedContainerRequestUUID, "-output", "results"}, &bytes.Buffer{}, &stdout, &stderr)
+       c.Check(exitcode, check.Equals, 0)
+       c.Assert(stderr.String(), check.Matches, "(?ms).*supplied uuids in .*")
+
+       uuidReport, err := ioutil.ReadFile("results/" + arvadostest.CompletedContainerRequestUUID + ".csv")
+       c.Assert(err, check.IsNil)
+       c.Check(string(uuidReport), check.Matches, "(?ms).*TOTAL,,,,,,,,,7.01302889")
+       re := regexp.MustCompile(`(?ms).*supplied uuids in (.*?)\n`)
+       matches := re.FindStringSubmatch(stderr.String()) // matches[1] contains a string like 'results/2020-11-02-18-57-45-aggregate-costaccounting.csv'
+
+       aggregateCostReport, err := ioutil.ReadFile(matches[1])
+       c.Assert(err, check.IsNil)
+
+       c.Check(string(aggregateCostReport), check.Matches, "(?ms).*TOTAL,7.01302889")
+}
+
+func (*Suite) TestDoubleContainerRequestUUID(c *check.C) {
+       var stdout, stderr bytes.Buffer
+       // Run costanalyzer with 2 container request uuids
+       exitcode := Command.RunCommand("costanalyzer.test", []string{"-uuid", arvadostest.CompletedContainerRequestUUID, "-uuid", arvadostest.CompletedContainerRequestUUID2, "-output", "results"}, &bytes.Buffer{}, &stdout, &stderr)
+       c.Check(exitcode, check.Equals, 0)
+       c.Assert(stderr.String(), check.Matches, "(?ms).*supplied uuids in .*")
+
+       uuidReport, err := ioutil.ReadFile("results/" + arvadostest.CompletedContainerRequestUUID + ".csv")
+       c.Assert(err, check.IsNil)
+       c.Check(string(uuidReport), check.Matches, "(?ms).*TOTAL,,,,,,,,,7.01302889")
+
+       uuidReport2, err := ioutil.ReadFile("results/" + arvadostest.CompletedContainerRequestUUID2 + ".csv")
+       c.Assert(err, check.IsNil)
+       c.Check(string(uuidReport2), check.Matches, "(?ms).*TOTAL,,,,,,,,,42.27031111")
+
+       re := regexp.MustCompile(`(?ms).*supplied uuids in (.*?)\n`)
+       matches := re.FindStringSubmatch(stderr.String()) // matches[1] contains a string like 'results/2020-11-02-18-57-45-aggregate-costaccounting.csv'
+
+       aggregateCostReport, err := ioutil.ReadFile(matches[1])
+       c.Assert(err, check.IsNil)
+
+       c.Check(string(aggregateCostReport), check.Matches, "(?ms).*TOTAL,49.28334000")
+       stdout.Truncate(0)
+       stderr.Truncate(0)
+
+       // Now move both container requests into an existing project, and then re-run
+       // the analysis with the project uuid. The results should be identical.
+       ac := arvados.NewClientFromEnv()
+       var cr arvados.ContainerRequest
+       err = ac.RequestAndDecode(&cr, "PUT", "arvados/v1/container_requests/"+arvadostest.CompletedContainerRequestUUID, nil, map[string]interface{}{
+               "container_request": map[string]interface{}{
+                       "owner_uuid": arvadostest.AProjectUUID,
+               },
+       })
+       c.Assert(err, check.IsNil)
+       err = ac.RequestAndDecode(&cr, "PUT", "arvados/v1/container_requests/"+arvadostest.CompletedContainerRequestUUID2, nil, map[string]interface{}{
+               "container_request": map[string]interface{}{
+                       "owner_uuid": arvadostest.AProjectUUID,
+               },
+       })
+       c.Assert(err, check.IsNil)
+
+       // Run costanalyzer with the project uuid
+       exitcode = Command.RunCommand("costanalyzer.test", []string{"-uuid", arvadostest.AProjectUUID, "-cache=false", "-log-level", "debug", "-output", "results"}, &bytes.Buffer{}, &stdout, &stderr)
+       c.Check(exitcode, check.Equals, 0)
+       c.Assert(stderr.String(), check.Matches, "(?ms).*supplied uuids in .*")
+
+       uuidReport, err = ioutil.ReadFile("results/" + arvadostest.CompletedContainerRequestUUID + ".csv")
+       c.Assert(err, check.IsNil)
+       c.Check(string(uuidReport), check.Matches, "(?ms).*TOTAL,,,,,,,,,7.01302889")
+
+       uuidReport2, err = ioutil.ReadFile("results/" + arvadostest.CompletedContainerRequestUUID2 + ".csv")
+       c.Assert(err, check.IsNil)
+       c.Check(string(uuidReport2), check.Matches, "(?ms).*TOTAL,,,,,,,,,42.27031111")
+
+       re = regexp.MustCompile(`(?ms).*supplied uuids in (.*?)\n`)
+       matches = re.FindStringSubmatch(stderr.String()) // matches[1] contains a string like 'results/2020-11-02-18-57-45-aggregate-costaccounting.csv'
+
+       aggregateCostReport, err = ioutil.ReadFile(matches[1])
+       c.Assert(err, check.IsNil)
+
+       c.Check(string(aggregateCostReport), check.Matches, "(?ms).*TOTAL,49.28334000")
+}
+
+func (*Suite) TestMultipleContainerRequestUUIDWithReuse(c *check.C) {
+       var stdout, stderr bytes.Buffer
+       // Run costanalyzer with 2 container request uuids
+       exitcode := Command.RunCommand("costanalyzer.test", []string{"-uuid", arvadostest.CompletedDiagnosticsContainerRequest1UUID, "-uuid", arvadostest.CompletedDiagnosticsContainerRequest2UUID, "-output", "results"}, &bytes.Buffer{}, &stdout, &stderr)
+       c.Check(exitcode, check.Equals, 0)
+       c.Assert(stderr.String(), check.Matches, "(?ms).*supplied uuids in .*")
+
+       uuidReport, err := ioutil.ReadFile("results/" + arvadostest.CompletedDiagnosticsContainerRequest1UUID + ".csv")
+       c.Assert(err, check.IsNil)
+       c.Check(string(uuidReport), check.Matches, "(?ms).*TOTAL,,,,,,,,,0.00916192")
+
+       uuidReport2, err := ioutil.ReadFile("results/" + arvadostest.CompletedDiagnosticsContainerRequest2UUID + ".csv")
+       c.Assert(err, check.IsNil)
+       c.Check(string(uuidReport2), check.Matches, "(?ms).*TOTAL,,,,,,,,,0.00588088")
+
+       re := regexp.MustCompile(`(?ms).*supplied uuids in (.*?)\n`)
+       matches := re.FindStringSubmatch(stderr.String()) // matches[1] contains a string like 'results/2020-11-02-18-57-45-aggregate-costaccounting.csv'
+
+       aggregateCostReport, err := ioutil.ReadFile(matches[1])
+       c.Assert(err, check.IsNil)
+
+       c.Check(string(aggregateCostReport), check.Matches, "(?ms).*TOTAL,0.01492030")
+}
index 935bec63b6d6a0b585c0eca38a22e20a42d304a7..0021bc8d906c5531b70c79a87d9be169658b5c57 100755 (executable)
@@ -141,9 +141,11 @@ else
   . /usr/src/arvados/build/run-library.sh
   TMPHERE=\$(pwd)
   cd /usr/src/arvados
+
+  # This defines python_sdk_version and cwl_runner_version with python-style
+  # package suffixes (.dev/rc)
   calculate_python_sdk_cwl_package_versions
 
-  cwl_runner_version=\$(echo -n \$cwl_runner_version | sed s/~dev/.dev/g | sed s/~rc/rc/g)
   cd \$TMPHERE
   set -u
 
index 5677f4deca5d70f16ab44d4023434f7e94fc73e2..aeb5a47e6d0559df094ee3cbec5432d3b3b8f2ce 100644 (file)
@@ -44,7 +44,27 @@ const (
 
        RunningContainerUUID = "zzzzz-dz642-runningcontainr"
 
-       CompletedContainerUUID = "zzzzz-dz642-compltcontainer"
+       CompletedContainerUUID         = "zzzzz-dz642-compltcontainer"
+       CompletedContainerRequestUUID  = "zzzzz-xvhdp-cr4completedctr"
+       CompletedContainerRequestUUID2 = "zzzzz-xvhdp-cr4completedcr2"
+
+       CompletedDiagnosticsContainerRequest1UUID     = "zzzzz-xvhdp-diagnostics0001"
+       CompletedDiagnosticsContainerRequest2UUID     = "zzzzz-xvhdp-diagnostics0002"
+       CompletedDiagnosticsContainer1UUID            = "zzzzz-dz642-diagcompreq0001"
+       CompletedDiagnosticsContainer2UUID            = "zzzzz-dz642-diagcompreq0002"
+       DiagnosticsContainerRequest1LogCollectionUUID = "zzzzz-4zz18-diagcompreqlog1"
+       DiagnosticsContainerRequest2LogCollectionUUID = "zzzzz-4zz18-diagcompreqlog2"
+
+       CompletedDiagnosticsHasher1ContainerRequestUUID = "zzzzz-xvhdp-diag1hasher0001"
+       CompletedDiagnosticsHasher2ContainerRequestUUID = "zzzzz-xvhdp-diag1hasher0002"
+       CompletedDiagnosticsHasher3ContainerRequestUUID = "zzzzz-xvhdp-diag1hasher0003"
+       CompletedDiagnosticsHasher1ContainerUUID        = "zzzzz-dz642-diagcomphasher1"
+       CompletedDiagnosticsHasher2ContainerUUID        = "zzzzz-dz642-diagcomphasher2"
+       CompletedDiagnosticsHasher3ContainerUUID        = "zzzzz-dz642-diagcomphasher3"
+
+       Hasher1LogCollectionUUID = "zzzzz-4zz18-dlogcollhash001"
+       Hasher2LogCollectionUUID = "zzzzz-4zz18-dlogcollhash002"
+       Hasher3LogCollectionUUID = "zzzzz-4zz18-dlogcollhash003"
 
        ArvadosRepoUUID = "zzzzz-s0uqq-arvadosrepo0123"
        ArvadosRepoName = "arvados"
@@ -73,6 +93,9 @@ const (
        TestVMUUID = "zzzzz-2x53u-382brsig8rp3064"
 
        CollectionWithUniqueWordsUUID = "zzzzz-4zz18-mnt690klmb51aud"
+
+       LogCollectionUUID  = "zzzzz-4zz18-logcollection01"
+       LogCollectionUUID2 = "zzzzz-4zz18-logcollection02"
 )
 
 // PathologicalManifest : A valid manifest designed to test
index 74a4c1efa571b5229825243c98034957e99d4e45..6b308a231cb7ede8cf50b949da75a861a46219d3 100644 (file)
@@ -130,6 +130,7 @@ class ApiClientAuthorization < ArvadosModel
 
     token_uuid = ''
     secret = token
+    stored_secret = nil         # ...if different from secret
     optional = nil
 
     case token[0..2]
@@ -206,8 +207,7 @@ class ApiClientAuthorization < ArvadosModel
         # below. If so, we'll stuff the database with hmac instead of
         # the real OIDC token.
         upstream_cluster_id = Rails.configuration.Login.LoginCluster
-        token_uuid = upstream_cluster_id + generate_uuid[5..27]
-        secret = hmac
+        stored_secret = hmac
       else
         return nil
       end
@@ -246,6 +246,23 @@ class ApiClientAuthorization < ArvadosModel
 
     remote_user_prefix = remote_user['uuid'][0..4]
 
+    if token_uuid == ''
+      # Use the same UUID as the remote when caching the token.
+      begin
+        remote_token = SafeJSON.load(
+          clnt.get_content('https://' + host + '/arvados/v1/api_client_authorizations/current',
+                           {'remote' => Rails.configuration.ClusterID},
+                           {'Authorization' => 'Bearer ' + token}))
+        token_uuid = remote_token['uuid']
+        if !token_uuid.match(HasUuid::UUID_REGEX) || token_uuid[0..4] != upstream_cluster_id
+          raise "remote cluster #{upstream_cluster_id} returned invalid token uuid #{token_uuid.inspect}"
+        end
+      rescue => e
+        Rails.logger.warn "error getting remote token details for #{token.inspect}: #{e}"
+        return nil
+      end
+    end
+
     # Clusters can only authenticate for their own users.
     if remote_user_prefix != upstream_cluster_id
       Rails.logger.warn "remote authentication rejected: claimed remote user #{remote_user_prefix} but token was issued by #{upstream_cluster_id}"
@@ -328,11 +345,18 @@ class ApiClientAuthorization < ArvadosModel
         auth.user = user
         auth.api_client_id = 0
       end
+      # If stored_secret is set, we save stored_secret in the database
+      # but return the real secret to the caller. This way, if we end
+      # up returning the auth record to the client, they see the same
+      # secret they supplied, instead of the HMAC we saved in the
+      # database.
+      stored_secret = stored_secret || secret
       auth.update_attributes!(user: user,
-                              api_token: secret,
+                              api_token: stored_secret,
                               api_client_id: 0,
                               expires_at: Time.now + Rails.configuration.Login.RemoteTokenRefresh)
-      Rails.logger.debug "cached remote token #{token_uuid} with secret #{secret} in local db"
+      Rails.logger.debug "cached remote token #{token_uuid} with secret #{stored_secret} in local db"
+      auth.api_token = secret
       return auth
     end
 
index a16ee8763f3f32016e76af30f74da1fda86be186..767f035b88cb824c47de95ef9571c5531c228c23 100644 (file)
@@ -1031,6 +1031,90 @@ collection_with_uri_prop:
   properties:
     "http://schema.org/example": "value1"
 
+log_collection:
+  uuid: zzzzz-4zz18-logcollection01
+  current_version_uuid: zzzzz-4zz18-logcollection01
+  portable_data_hash: 680c855fd6cf2c78778b3728b268925a+475
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2020-10-29T00:51:44.075594000Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2020-10-29T00:51:44.072109000Z
+  manifest_text: ". 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n./log\\040for\\040container\\040ce8i5-dz642-h4kd64itncdcz8l 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n"
+  name: a real log collection for a completed container
+
+log_collection2:
+  uuid: zzzzz-4zz18-logcollection02
+  current_version_uuid: zzzzz-4zz18-logcollection02
+  portable_data_hash: 680c855fd6cf2c78778b3728b268925a+475
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2020-10-29T00:51:44.075594000Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2020-10-29T00:51:44.072109000Z
+  manifest_text: ". 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n./log\\040for\\040container\\040ce8i5-dz642-h4kd64itncdcz8l 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n"
+  name: another real log collection for a completed container
+
+diagnostics_request_container_log_collection:
+  uuid: zzzzz-4zz18-diagcompreqlog1
+  current_version_uuid: zzzzz-4zz18-diagcompreqlog1
+  portable_data_hash: 680c855fd6cf2c78778b3728b268925a+475
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2020-11-02T00:20:44.007557000Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2020-11-02T00:20:44.005381000Z
+  manifest_text: ". 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n./log\\040for\\040container\\040ce8i5-dz642-h4kd64itncdcz8l 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n"
+  name: Container log for request zzzzz-xvhdp-diagnostics0001
+
+hasher1_log_collection:
+  uuid: zzzzz-4zz18-dlogcollhash001
+  current_version_uuid: zzzzz-4zz18-dlogcollhash001
+  portable_data_hash: 680c855fd6cf2c78778b3728b268925a+475
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2020-11-02T00:16:55.272606000Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2020-11-02T00:16:55.267006000Z
+  manifest_text: ". 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n./log\\040for\\040container\\040ce8i5-dz642-h4kd64itncdcz8l 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n"
+  name: hasher1 log collection
+
+hasher2_log_collection:
+  uuid: zzzzz-4zz18-dlogcollhash002
+  current_version_uuid: zzzzz-4zz18-dlogcollhash002
+  portable_data_hash: 680c855fd6cf2c78778b3728b268925a+475
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2020-11-02T00:20:23.547251000Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2020-11-02T00:20:23.545275000Z
+  manifest_text: ". 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n./log\\040for\\040container\\040ce8i5-dz642-h4kd64itncdcz8l 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n"
+  name: hasher2 log collection
+
+hasher3_log_collection:
+  uuid: zzzzz-4zz18-dlogcollhash003
+  current_version_uuid: zzzzz-4zz18-dlogcollhash003
+  portable_data_hash: 680c855fd6cf2c78778b3728b268925a+475
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2020-11-02T00:20:38.789204000Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2020-11-02T00:20:38.787329000Z
+  manifest_text: ". 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n./log\\040for\\040container\\040ce8i5-dz642-h4kd64itncdcz8l 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n"
+  name: hasher3 log collection
+
+diagnostics_request_container_log_collection2:
+  uuid: zzzzz-4zz18-diagcompreqlog2
+  current_version_uuid: zzzzz-4zz18-diagcompreqlog2
+  portable_data_hash: 680c855fd6cf2c78778b3728b268925a+475
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2020-11-03T16:17:53.351593000Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2020-11-03T16:17:53.346969000Z
+  manifest_text: ". 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n./log\\040for\\040container\\040ce8i5-dz642-h4kd64itncdcz8l 8c12f5f5297b7337598170c6f531fcee+7882 0:0:arv-mount.txt 0:1910:container.json 1910:1264:crunch-run.txt 3174:1005:crunchstat.txt 4179:659:hoststat.txt 4838:2811:node-info.txt 7649:233:node.json 0:0:stderr.txt\n"
+  name: Container log for request zzzzz-xvhdp-diagnostics0002
+
 # Test Helper trims the rest of the file
 
 # Do not add your fixtures below this line as the rest of this file will be trimmed by test_helper
index ea86dca1784834d7ca0c37838c743aa785812a7b..ab0400a67854c47b3967fb84aca44265e5f7f227 100644 (file)
@@ -94,7 +94,7 @@ completed:
   output_path: test
   command: ["echo", "hello"]
   container_uuid: zzzzz-dz642-compltcontainer
-  log_uuid: zzzzz-4zz18-y9vne9npefyxh8g
+  log_uuid: zzzzz-4zz18-logcollection01
   output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
   runtime_constraints:
     vcpus: 1
@@ -115,10 +115,238 @@ completed-older:
   output_path: test
   command: ["arvados-cwl-runner", "echo", "hello"]
   container_uuid: zzzzz-dz642-compltcontainr2
+  log_uuid: zzzzz-4zz18-logcollection02
+  output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
   runtime_constraints:
     vcpus: 1
     ram: 123
 
+completed_diagnostics:
+  name: CWL diagnostics hasher
+  uuid: zzzzz-xvhdp-diagnostics0001
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: Final
+  priority: 1
+  created_at: 2020-11-02T00:03:50.229364000Z
+  modified_at: 2020-11-02T00:20:44.041122000Z
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  container_image: d967ef4a1ca90a096a39f5ce68e4a2e7+261
+  cwd: /var/spool/cwl
+  output_path: /var/spool/cwl
+  command: [
+             "arvados-cwl-runner",
+             "--local",
+             "--api=containers",
+             "--no-log-timestamps",
+             "--disable-validate",
+             "--disable-color",
+             "--eval-timeout=20",
+             "--thread-count=1",
+             "--disable-reuse",
+             "--collection-cache-size=256",
+             "--on-error=continue",
+             "/var/lib/cwl/workflow.json#main",
+             "/var/lib/cwl/cwl.input.json"
+           ]
+  container_uuid: zzzzz-dz642-diagcompreq0001
+  log_uuid: zzzzz-4zz18-diagcompreqlog1
+  output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
+  runtime_constraints:
+    vcpus: 1
+    ram: 1342177280
+    API: true
+
+completed_diagnostics_hasher1:
+  name: hasher1
+  uuid: zzzzz-xvhdp-diag1hasher0001
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: Final
+  priority: 500
+  created_at: 2020-11-02T00:03:50.229364000Z
+  modified_at: 2020-11-02T00:20:44.041122000Z
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  container_image: d967ef4a1ca90a096a39f5ce68e4a2e7+261
+  cwd: /var/spool/cwl
+  output_name: Output for step hasher1
+  output_path: /var/spool/cwl
+  command: [
+             "md5sum",
+             "/keep/9f26a86b6030a69ad222cf67d71c9502+65/hasher-input-file.txt"
+           ]
+  container_uuid: zzzzz-dz642-diagcomphasher1
+  requesting_container_uuid: zzzzz-dz642-diagcompreq0001
+  log_uuid: zzzzz-4zz18-dlogcollhash001
+  output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
+  runtime_constraints:
+    vcpus: 1
+    ram: 2684354560
+    API: true
+
+completed_diagnostics_hasher2:
+  name: hasher2
+  uuid: zzzzz-xvhdp-diag1hasher0002
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: Final
+  priority: 500
+  created_at: 2020-11-02T00:17:07.067464000Z
+  modified_at: 2020-11-02T00:20:23.557498000Z
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  container_image: d967ef4a1ca90a096a39f5ce68e4a2e7+261
+  cwd: /var/spool/cwl
+  output_name: Output for step hasher2
+  output_path: /var/spool/cwl
+  command: [
+             "md5sum",
+             "/keep/d3a687732e84061f3bae15dc7e313483+62/hasher1.md5sum.txt"
+           ]
+  container_uuid: zzzzz-dz642-diagcomphasher2
+  requesting_container_uuid: zzzzz-dz642-diagcompreq0001
+  log_uuid: zzzzz-4zz18-dlogcollhash002
+  output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
+  runtime_constraints:
+    vcpus: 2
+    ram: 2684354560
+    API: true
+
+completed_diagnostics_hasher3:
+  name: hasher3
+  uuid: zzzzz-xvhdp-diag1hasher0003
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: Final
+  priority: 500
+  created_at: 2020-11-02T00:20:30.960251000Z
+  modified_at: 2020-11-02T00:20:38.799377000Z
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  container_image: d967ef4a1ca90a096a39f5ce68e4a2e7+261
+  cwd: /var/spool/cwl
+  output_name: Output for step hasher3
+  output_path: /var/spool/cwl
+  command: [
+             "md5sum",
+             "/keep/6bd770f6cf8f83e7647c602eecfaeeb8+62/hasher2.md5sum.txt"
+           ]
+  container_uuid: zzzzz-dz642-diagcomphasher3
+  requesting_container_uuid: zzzzz-dz642-diagcompreq0001
+  log_uuid: zzzzz-4zz18-dlogcollhash003
+  output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
+  runtime_constraints:
+    vcpus: 1
+    ram: 2684354560
+    API: true
+
+completed_diagnostics2:
+  name: Copy of CWL diagnostics hasher
+  uuid: zzzzz-xvhdp-diagnostics0002
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: Final
+  priority: 1
+  created_at: 2020-11-03T15:54:30.098485000Z
+  modified_at: 2020-11-03T16:17:53.406809000Z
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  container_image: d967ef4a1ca90a096a39f5ce68e4a2e7+261
+  cwd: /var/spool/cwl
+  output_path: /var/spool/cwl
+  command: [
+             "arvados-cwl-runner",
+             "--local",
+             "--api=containers",
+             "--no-log-timestamps",
+             "--disable-validate",
+             "--disable-color",
+             "--eval-timeout=20",
+             "--thread-count=1",
+             "--disable-reuse",
+             "--collection-cache-size=256",
+             "--on-error=continue",
+             "/var/lib/cwl/workflow.json#main",
+             "/var/lib/cwl/cwl.input.json"
+           ]
+  container_uuid: zzzzz-dz642-diagcompreq0002
+  log_uuid: zzzzz-4zz18-diagcompreqlog2
+  output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
+  runtime_constraints:
+    vcpus: 1
+    ram: 1342177280
+    API: true
+
+completed_diagnostics_hasher1_reuse:
+  name: hasher1
+  uuid: zzzzz-xvhdp-diag2hasher0001
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: Final
+  priority: 500
+  created_at: 2020-11-02T00:03:50.229364000Z
+  modified_at: 2020-11-02T00:20:44.041122000Z
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  container_image: d967ef4a1ca90a096a39f5ce68e4a2e7+261
+  cwd: /var/spool/cwl
+  output_name: Output for step hasher1
+  output_path: /var/spool/cwl
+  command: [
+             "md5sum",
+             "/keep/9f26a86b6030a69ad222cf67d71c9502+65/hasher-input-file.txt"
+           ]
+  container_uuid: zzzzz-dz642-diagcomphasher1
+  requesting_container_uuid: zzzzz-dz642-diagcompreq0002
+  log_uuid: zzzzz-4zz18-dlogcollhash001
+  output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
+  runtime_constraints:
+    vcpus: 1
+    ram: 2684354560
+    API: true
+
+completed_diagnostics_hasher2_reuse:
+  name: hasher2
+  uuid: zzzzz-xvhdp-diag2hasher0002
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: Final
+  priority: 500
+  created_at: 2020-11-02T00:17:07.067464000Z
+  modified_at: 2020-11-02T00:20:23.557498000Z
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  container_image: d967ef4a1ca90a096a39f5ce68e4a2e7+261
+  cwd: /var/spool/cwl
+  output_name: Output for step hasher2
+  output_path: /var/spool/cwl
+  command: [
+             "md5sum",
+             "/keep/d3a687732e84061f3bae15dc7e313483+62/hasher1.md5sum.txt"
+           ]
+  container_uuid: zzzzz-dz642-diagcomphasher2
+  requesting_container_uuid: zzzzz-dz642-diagcompreq0002
+  log_uuid: zzzzz-4zz18-dlogcollhash002
+  output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
+  runtime_constraints:
+    vcpus: 2
+    ram: 2684354560
+    API: true
+
+completed_diagnostics_hasher3_reuse:
+  name: hasher3
+  uuid: zzzzz-xvhdp-diag2hasher0003
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: Final
+  priority: 500
+  created_at: 2020-11-02T00:20:30.960251000Z
+  modified_at: 2020-11-02T00:20:38.799377000Z
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  container_image: d967ef4a1ca90a096a39f5ce68e4a2e7+261
+  cwd: /var/spool/cwl
+  output_name: Output for step hasher3
+  output_path: /var/spool/cwl
+  command: [
+             "md5sum",
+             "/keep/6bd770f6cf8f83e7647c602eecfaeeb8+62/hasher2.md5sum.txt"
+           ]
+  container_uuid: zzzzz-dz642-diagcomphasher3
+  requesting_container_uuid: zzzzz-dz642-diagcompreq0002
+  log_uuid: zzzzz-4zz18-dlogcollhash003
+  output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
+  runtime_constraints:
+    vcpus: 1
+    ram: 2684354560
+    API: true
+
 requester:
   uuid: zzzzz-xvhdp-9zacv3o1xw6sxz5
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
@@ -309,7 +537,7 @@ completed_with_input_mounts:
     vcpus: 1
     ram: 123
   container_uuid: zzzzz-dz642-compltcontainer
-  log_uuid: zzzzz-4zz18-y9vne9npefyxh8g
+  log_uuid: zzzzz-4zz18-logcollection01
   output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
   mounts:
     /var/lib/cwl/cwl.input.json:
@@ -758,7 +986,7 @@ cr_in_trashed_project:
   output_path: test
   command: ["echo", "hello"]
   container_uuid: zzzzz-dz642-compltcontainer
-  log_uuid: zzzzz-4zz18-y9vne9npefyxh8g
+  log_uuid: zzzzz-4zz18-logcollection01
   output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
   runtime_constraints:
     vcpus: 1
index f18adb5dbd7d1ad56c2575984a668240e21479a6..b7d082771a0b37f2c3760bae23c46591805e07ef 100644 (file)
@@ -126,6 +126,153 @@ completed_older:
   secret_mounts: {}
   secret_mounts_md5: 99914b932bd37a50b983c5e7c90ae93b
 
+diagnostics_completed_requester:
+  uuid: zzzzz-dz642-diagcompreq0001
+  owner_uuid: zzzzz-tpzed-000000000000000
+  state: Complete
+  exit_code: 0
+  priority: 562948349145881771
+  created_at: 2020-11-02T00:03:50.192697000Z
+  modified_at: 2020-11-02T00:20:43.987275000Z
+  started_at: 2020-11-02T00:08:07.186711000Z
+  finished_at: 2020-11-02T00:20:43.975416000Z
+  container_image: d967ef4a1ca90a096a39f5ce68e4a2e7+261
+  cwd: /var/spool/cwl
+  log: 6129e376cb05c942f75a0c36083383e8+244
+  output: 1f4b0bc7583c2a7f9102c395f4ffc5e3+45
+  output_path: /var/spool/cwl
+  command: [
+             "arvados-cwl-runner",
+             "--local",
+             "--api=containers",
+             "--no-log-timestamps",
+             "--disable-validate",
+             "--disable-color",
+             "--eval-timeout=20",
+             "--thread-count=1",
+             "--disable-reuse",
+             "--collection-cache-size=256",
+             "--on-error=continue",
+             "/var/lib/cwl/workflow.json#main",
+             "/var/lib/cwl/cwl.input.json"
+           ]
+  runtime_constraints:
+    API: true
+    keep_cache_ram: 268435456
+    ram: 1342177280
+    vcpus: 1
+
+diagnostics_completed_hasher1:
+  uuid: zzzzz-dz642-diagcomphasher1
+  owner_uuid: zzzzz-tpzed-000000000000000
+  state: Complete
+  exit_code: 0
+  priority: 562948349145881771
+  created_at: 2020-11-02T00:08:18.829222000Z
+  modified_at: 2020-11-02T00:16:55.142023000Z
+  started_at: 2020-11-02T00:16:52.375871000Z
+  finished_at: 2020-11-02T00:16:55.105985000Z
+  container_image: d967ef4a1ca90a096a39f5ce68e4a2e7+261
+  cwd: /var/spool/cwl
+  log: fed8fb19fe8e3a320c29fed0edab12dd+220
+  output: d3a687732e84061f3bae15dc7e313483+62
+  output_path: /var/spool/cwl
+  command: [
+             "md5sum",
+             "/keep/9f26a86b6030a69ad222cf67d71c9502+65/hasher-input-file.txt"
+           ]
+  runtime_constraints:
+    API: true
+    keep_cache_ram: 268435456
+    ram: 268435456
+    vcpus: 1
+
+diagnostics_completed_hasher2:
+  uuid: zzzzz-dz642-diagcomphasher2
+  owner_uuid: zzzzz-tpzed-000000000000000
+  state: Complete
+  exit_code: 0
+  priority: 562948349145881771
+  created_at: 2020-11-02T00:17:07.026493000Z
+  modified_at: 2020-11-02T00:20:23.505908000Z
+  started_at: 2020-11-02T00:20:21.513185000Z
+  finished_at: 2020-11-02T00:20:23.478317000Z
+  container_image: d967ef4a1ca90a096a39f5ce68e4a2e7+261
+  cwd: /var/spool/cwl
+  log: 4fc03b95fc2646b0dec7383dbb7d56d8+221
+  output: 6bd770f6cf8f83e7647c602eecfaeeb8+62
+  output_path: /var/spool/cwl
+  command: [
+             "md5sum",
+             "/keep/d3a687732e84061f3bae15dc7e313483+62/hasher1.md5sum.txt"
+           ]
+  runtime_constraints:
+    API: true
+    keep_cache_ram: 268435456
+    ram: 268435456
+    vcpus: 2
+
+diagnostics_completed_hasher3:
+  uuid: zzzzz-dz642-diagcomphasher3
+  owner_uuid: zzzzz-tpzed-000000000000000
+  state: Complete
+  exit_code: 0
+  priority: 562948349145881771
+  created_at: 2020-11-02T00:20:30.943856000Z
+  modified_at: 2020-11-02T00:20:38.746541000Z
+  started_at: 2020-11-02T00:20:36.748957000Z
+  finished_at: 2020-11-02T00:20:38.732199000Z
+  container_image: d967ef4a1ca90a096a39f5ce68e4a2e7+261
+  cwd: /var/spool/cwl
+  log: 1eeaf70de0f65b1346e54c59f09e848d+210
+  output: 11b5fdaa380102e760c3eb6de80a9876+62
+  output_path: /var/spool/cwl
+  command: [
+             "md5sum",
+             "/keep/6bd770f6cf8f83e7647c602eecfaeeb8+62/hasher2.md5sum.txt"
+           ]
+  runtime_constraints:
+    API: true
+    keep_cache_ram: 268435456
+    ram: 268435456
+    vcpus: 1
+
+diagnostics_completed_requester2:
+  uuid: zzzzz-dz642-diagcompreq0002
+  owner_uuid: zzzzz-tpzed-000000000000000
+  state: Complete
+  exit_code: 0
+  priority: 1124295487972526
+  created_at: 2020-11-03T15:54:36.504661000Z
+  modified_at: 2020-11-03T16:17:53.242868000Z
+  started_at: 2020-11-03T16:09:51.123659000Z
+  finished_at: 2020-11-03T16:17:53.220358000Z
+  container_image: d967ef4a1ca90a096a39f5ce68e4a2e7+261
+  cwd: /var/spool/cwl
+  log: f1933bf5191f576613ea7f65bd0ead53+244
+  output: 941b71a57208741ce8742eca62352fb1+123
+  output_path: /var/spool/cwl
+  command: [
+             "arvados-cwl-runner",
+             "--local",
+             "--api=containers",
+             "--no-log-timestamps",
+             "--disable-validate",
+             "--disable-color",
+             "--eval-timeout=20",
+             "--thread-count=1",
+             "--disable-reuse",
+             "--collection-cache-size=256",
+             "--on-error=continue",
+             "/var/lib/cwl/workflow.json#main",
+             "/var/lib/cwl/cwl.input.json"
+           ]
+  runtime_constraints:
+    API: true
+    keep_cache_ram: 268435456
+    ram: 1342177280
+    vcpus: 1
+
 requester:
   uuid: zzzzz-dz642-requestingcntnr
   owner_uuid: zzzzz-tpzed-000000000000000
index 4115482d809974648e9cf99ea2be7800a829b45f..a5899ce8a7cc0809a57b64a9588d8e227846c274 100644 (file)
@@ -202,7 +202,7 @@ var containerUuidPattern = regexp.MustCompile(`^[a-z0-9]{5}-dz642-[a-z0-9]{15}$`
 // Cancelled or Complete. See https://dev.arvados.org/issues/10979
 func (disp *Dispatcher) checkSqueueForOrphans() {
        for _, uuid := range disp.sqCheck.All() {
-               if !containerUuidPattern.MatchString(uuid) {
+               if !containerUuidPattern.MatchString(uuid) || !strings.HasPrefix(uuid, disp.cluster.ClusterID) {
                        continue
                }
                err := disp.TrackContainer(uuid)
index 49fb2456f5851662bec9573af6e06978d930d741..373fd9a25d56608d1c418da45221836b1f5cdb16 100644 (file)
@@ -16,6 +16,7 @@ import (
        "net/url"
        "os"
        "path/filepath"
+       "regexp"
        "sort"
        "strconv"
        "strings"
@@ -111,6 +112,21 @@ func s3signature(secretKey, scope, signedHeaders, stringToSign string) (string,
        return hashdigest(hmac.New(sha256.New, key), stringToSign), nil
 }
 
+var v2tokenUnderscore = regexp.MustCompile(`^v2_[a-z0-9]{5}-gj3su-[a-z0-9]{15}_`)
+
+func unescapeKey(key string) string {
+       if v2tokenUnderscore.MatchString(key) {
+               // Entire Arvados token, with "/" replaced by "_" to
+               // avoid colliding with the Authorization header
+               // format.
+               return strings.Replace(key, "_", "/", -1)
+       } else if s, err := url.PathUnescape(key); err == nil {
+               return s
+       } else {
+               return key
+       }
+}
+
 // checks3signature verifies the given S3 V4 signature and returns the
 // Arvados token that corresponds to the given accessKey. An error is
 // returned if accessKey is not a valid token UUID or the signature
@@ -152,7 +168,7 @@ func (h *handler) checks3signature(r *http.Request) (string, error) {
        } else {
                // Access key and secret key are both an entire
                // Arvados token or OIDC access token.
-               ctx := arvados.ContextWithAuthorization(r.Context(), "Bearer "+key)
+               ctx := arvados.ContextWithAuthorization(r.Context(), "Bearer "+unescapeKey(key))
                err = client.RequestAndDecodeContext(ctx, &aca, "GET", "arvados/v1/api_client_authorizations/current", nil, nil)
                secret = key
        }
@@ -170,7 +186,7 @@ func (h *handler) checks3signature(r *http.Request) (string, error) {
        } else if expect != signature {
                return "", fmt.Errorf("signature does not match (scope %q signedHeaders %q stringToSign %q)", scope, signedHeaders, stringToSign)
        }
-       return secret, nil
+       return aca.TokenV2(), nil
 }
 
 // serveS3 handles r and returns true if r is a request from an S3
@@ -183,7 +199,7 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        http.Error(w, "malformed Authorization header", http.StatusUnauthorized)
                        return true
                }
-               token = split[0]
+               token = unescapeKey(split[0])
        } else if strings.HasPrefix(auth, s3SignAlgorithm+" ") {
                t, err := h.checks3signature(r)
                if err != nil {
@@ -205,7 +221,15 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
        fs := client.SiteFileSystem(kc)
        fs.ForwardSlashNameSubstitution(h.Config.cluster.Collections.ForwardSlashNameSubstitution)
 
-       objectNameGiven := strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 1
+       var objectNameGiven bool
+       fspath := "/by_id"
+       if id := parseCollectionIDFromDNSName(r.Host); id != "" {
+               fspath += "/" + id
+               objectNameGiven = strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 0
+       } else {
+               objectNameGiven = strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 1
+       }
+       fspath += r.URL.Path
 
        switch {
        case r.Method == http.MethodGet && !objectNameGiven:
@@ -221,7 +245,6 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                }
                return true
        case r.Method == http.MethodGet || r.Method == http.MethodHead:
-               fspath := "/by_id" + r.URL.Path
                fi, err := fs.Stat(fspath)
                if r.Method == "HEAD" && !objectNameGiven {
                        // HeadBucket
@@ -255,7 +278,6 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        http.Error(w, "missing object name in PUT request", http.StatusBadRequest)
                        return true
                }
-               fspath := "by_id" + r.URL.Path
                var objectIsDir bool
                if strings.HasSuffix(fspath, "/") {
                        if !h.Config.cluster.Collections.S3FolderObjects {
@@ -350,7 +372,6 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        http.Error(w, "missing object name in DELETE request", http.StatusBadRequest)
                        return true
                }
-               fspath := "by_id" + r.URL.Path
                if strings.HasSuffix(fspath, "/") {
                        fspath = strings.TrimSuffix(fspath, "/")
                        fi, err := fs.Stat(fspath)
index 786e68afec4ca197980b56270e3f0bc66ab7494d..bff197dede52e90c117e2d1e830841f4cea01d65 100644 (file)
@@ -10,6 +10,7 @@ import (
        "fmt"
        "io/ioutil"
        "net/http"
+       "net/url"
        "os"
        "os/exec"
        "strings"
@@ -118,11 +119,15 @@ func (s *IntegrationSuite) TestS3Signatures(c *check.C) {
                secretkey string
        }{
                {true, aws.V2Signature, arvadostest.ActiveToken, "none"},
+               {true, aws.V2Signature, url.QueryEscape(arvadostest.ActiveTokenV2), "none"},
+               {true, aws.V2Signature, strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1), "none"},
                {false, aws.V2Signature, "none", "none"},
                {false, aws.V2Signature, "none", arvadostest.ActiveToken},
 
                {true, aws.V4Signature, arvadostest.ActiveTokenUUID, arvadostest.ActiveToken},
                {true, aws.V4Signature, arvadostest.ActiveToken, arvadostest.ActiveToken},
+               {true, aws.V4Signature, url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2)},
+               {true, aws.V4Signature, strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1), strings.Replace(arvadostest.ActiveTokenV2, "/", "_", -1)},
                {false, aws.V4Signature, arvadostest.ActiveToken, ""},
                {false, aws.V4Signature, arvadostest.ActiveToken, "none"},
                {false, aws.V4Signature, "none", arvadostest.ActiveToken},
@@ -700,3 +705,12 @@ func (s *IntegrationSuite) TestS3cmd(c *check.C) {
        c.Check(err, check.IsNil)
        c.Check(string(buf), check.Matches, `.* 3 +s3://`+arvadostest.FooCollection+`/foo\n`)
 }
+
+func (s *IntegrationSuite) TestS3BucketInHost(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+
+       hdr, body, _ := s.runCurl(c, "AWS "+arvadostest.ActiveTokenV2+":none", stage.coll.UUID+".collections.example.com", "/sailboat.txt")
+       c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+       c.Check(body, check.Equals, "⛵\n")
+}
index acdc11b305335fd25afe3fed4c27122c3488c84f..43817b51fcc78adaefe6525798b8ef400dbe2512 100644 (file)
@@ -257,12 +257,16 @@ func (s *IntegrationSuite) Test200(c *check.C) {
 }
 
 // Return header block and body.
-func (s *IntegrationSuite) runCurl(c *check.C, token, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
+func (s *IntegrationSuite) runCurl(c *check.C, auth, host, uri string, args ...string) (hdr, bodyPart string, bodySize int64) {
        curlArgs := []string{"--silent", "--show-error", "--include"}
        testHost, testPort, _ := net.SplitHostPort(s.testServer.Addr)
        curlArgs = append(curlArgs, "--resolve", host+":"+testPort+":"+testHost)
-       if token != "" {
-               curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
+       if strings.Contains(auth, " ") {
+               // caller supplied entire Authorization header value
+               curlArgs = append(curlArgs, "-H", "Authorization: "+auth)
+       } else if auth != "" {
+               // caller supplied Arvados token
+               curlArgs = append(curlArgs, "-H", "Authorization: Bearer "+auth)
        }
        curlArgs = append(curlArgs, args...)
        curlArgs = append(curlArgs, "http://"+host+":"+testPort+uri)