Merge branch '13645-fix-arvados-api-host-insecure'
authorFuad Muhic <fmuhic@capeannenterprises.com>
Thu, 19 Jul 2018 14:59:19 +0000 (16:59 +0200)
committerFuad Muhic <fmuhic@capeannenterprises.com>
Thu, 19 Jul 2018 14:59:19 +0000 (16:59 +0200)
refs #13645

Arvados-DCO-1.1-Signed-off-by: Fuad Muhic <fmuhic@capeannenterprises.com>

126 files changed:
apps/workbench/Gemfile.lock
apps/workbench/app/controllers/work_units_controller.rb
apps/workbench/app/helpers/application_helper.rb
apps/workbench/app/helpers/version_helper.rb
apps/workbench/app/views/application/_report_issue_popup.html.erb
apps/workbench/config/application.default.yml
apps/workbench/lib/app_version.rb
build/run-library.sh
build/run-tests.sh
doc/_config.yml
doc/_includes/_container_scheduling_parameters.liquid
doc/_includes/_install_compute_docker.liquid
doc/admin/activation.html.textile.liquid [new file with mode: 0644]
doc/admin/spot-instances.html.textile.liquid [new file with mode: 0644]
doc/api/methods/groups.html.textile.liquid
doc/api/tokens.html.textile.liquid
doc/install/arvados-on-kubernetes-GKE.html.textile.liquid [new file with mode: 0644]
doc/install/arvados-on-kubernetes-minikube.html.textile.liquid [new file with mode: 0644]
doc/install/arvados-on-kubernetes.html.textile.liquid [new file with mode: 0644]
doc/install/create-standard-objects.html.textile.liquid [deleted file]
doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
doc/install/crunch2-slurm/install-slurm.html.textile.liquid
doc/install/index.html.textile.liquid
doc/install/install-arv-git-httpd.html.textile.liquid
doc/install/install-components.html.textile.liquid [new file with mode: 0644]
doc/install/install-composer.html.textile.liquid [new file with mode: 0644]
doc/install/install-keep-balance.html.textile.liquid
doc/install/install-keepproxy.html.textile.liquid
doc/install/install-keepstore.html.textile.liquid
doc/install/install-manual-prerequisites.html.textile.liquid
doc/install/install-nodemanager.html.textile.liquid
doc/user/cwl/cwl-extensions.html.textile.liquid
lib/cmd/cmd.go
lib/controller/handler.go
lib/controller/handler_test.go
lib/dispatchcloud/node_size.go
lib/dispatchcloud/node_size_test.go
sdk/R/R/zzz.R [new file with mode: 0644]
sdk/R/README.Rmd
sdk/cli/arvados-cli.gemspec
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arv-cwl-schema.yml
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/arvados_cwl/arvworkflow.py
sdk/cwl/arvados_cwl/context.py
sdk/cwl/arvados_cwl/pathmapper.py
sdk/cwl/arvados_cwl/runner.py
sdk/cwl/arvados_cwl/util.py [new file with mode: 0644]
sdk/cwl/setup.py
sdk/cwl/tests/collection_per_tool/collection_per_tool_packed.cwl
sdk/cwl/tests/makes_intermediates/echo.cwl [new file with mode: 0644]
sdk/cwl/tests/makes_intermediates/hello1.txt [new file with mode: 0644]
sdk/cwl/tests/makes_intermediates/run_in_single.cwl [new file with mode: 0644]
sdk/cwl/tests/makes_intermediates/subwf.cwl [new file with mode: 0644]
sdk/cwl/tests/test_container.py
sdk/cwl/tests/test_submit.py
sdk/cwl/tests/test_util.py [new file with mode: 0644]
sdk/cwl/tests/wf/expect_packed.cwl
sdk/cwl/tests/wf/submit_wf_runner_resources.cwl [new file with mode: 0644]
sdk/go/arvados/byte_size.go [new file with mode: 0644]
sdk/go/arvados/byte_size_test.go [new file with mode: 0644]
sdk/go/arvados/config.go
sdk/go/arvados/config_test.go [new file with mode: 0644]
sdk/go/arvados/container.go
sdk/python/arvados/__init__.py
sdk/python/arvados/api.py
sdk/python/arvados/commands/keepdocker.py
sdk/python/arvados/keep.py
sdk/python/arvados/safeapi.py
sdk/python/setup.py
sdk/python/tests/nginx.conf
sdk/python/tests/run_test_server.py
sdk/python/tests/test_keep_client.py
services/api/Gemfile
services/api/Gemfile.lock
services/api/app/controllers/arvados/v1/schema_controller.rb
services/api/app/models/api_client_authorization.rb
services/api/app/models/container_request.rb
services/api/app/models/user.rb
services/api/config/application.default.yml
services/api/config/initializers/oj_mimic_json.rb [new file with mode: 0644]
services/api/config/initializers/time_format.rb
services/api/lib/app_version.rb
services/api/lib/crunch_dispatch.rb
services/api/lib/safe_json.rb
services/api/lib/whitelist_update.rb
services/api/test/functional/arvados/v1/schema_controller_test.rb
services/api/test/integration/remote_user_test.rb
services/api/test/test_helper.rb
services/api/test/unit/container_request_test.rb
services/api/test/unit/user_test.rb
services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
services/crunch-dispatch-slurm/squeue.go
services/crunch-dispatch-slurm/squeue_test.go
services/crunch-run/crunchrun.go
services/crunch-run/crunchrun_test.go
services/crunch-run/logging_test.go
services/keep-web/cache.go
services/keep-web/cadaver_test.go
services/keep-web/handler.go
services/keep-web/handler_test.go
services/keepproxy/keepproxy_test.go
services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
services/nodemanager/arvnodeman/computenode/driver/__init__.py
services/nodemanager/arvnodeman/computenode/driver/azure.py
services/nodemanager/arvnodeman/computenode/driver/dummy.py
services/nodemanager/arvnodeman/computenode/driver/ec2.py
services/nodemanager/arvnodeman/computenode/driver/gce.py
services/nodemanager/arvnodeman/config.py
services/nodemanager/arvnodeman/daemon.py
services/nodemanager/arvnodeman/jobqueue.py
services/nodemanager/arvnodeman/launcher.py
services/nodemanager/arvnodeman/nodelist.py
services/nodemanager/doc/azure.example.cfg
services/nodemanager/doc/ec2.example.cfg
services/nodemanager/doc/gce.example.cfg
services/nodemanager/setup.py
services/nodemanager/tests/integration_test.py
services/nodemanager/tests/test_computenode_dispatch.py
services/nodemanager/tests/test_computenode_dispatch_slurm.py
services/nodemanager/tests/test_daemon.py
services/nodemanager/tests/test_jobqueue.py
services/nodemanager/tests/test_nodelist.py
vendor/vendor.json

index 8868a2b0291357c90df5f1936d0fa183857a079c..06460ad06c1487d1d0c2e08978f36c644de95624 100644 (file)
@@ -186,7 +186,7 @@ GEM
       mini_portile2 (~> 2.3.0)
     npm-rails (0.2.1)
       rails (>= 3.2)
-    oj (3.5.0)
+    oj (3.6.4)
     os (0.9.6)
     passenger (5.2.1)
       rack
@@ -276,7 +276,7 @@ GEM
     simplecov-html (0.10.2)
     simplecov-rcov (0.2.3)
       simplecov (>= 0.4.1)
-    sprockets (3.7.1)
+    sprockets (3.7.2)
       concurrent-ruby (~> 1.0)
       rack (> 1, < 3)
     sprockets-rails (3.2.1)
@@ -358,4 +358,4 @@ DEPENDENCIES
   wiselinks
 
 BUNDLED WITH
-   1.16.1
+   1.16.2
index 0b0cdb4c3261274f1d74bd6bb9e97273a9f097b9..8527b4d48cb717b941ab376b68255e917c5797a3 100644 (file)
@@ -95,14 +95,33 @@ class WorkUnitsController < ApplicationController
       attrs['cwd'] = "/var/spool/cwl"
       attrs['output_path'] = "/var/spool/cwl"
 
+      # runtime constriants
+      runtime_constraints = {
+        "vcpus" => 1,
+        "ram" => 1024 * 1024 * 1024,
+        "API" => true
+      }
+
       input_defaults = {}
       if wf_json
-        inputs = get_cwl_inputs(wf_json)
-        inputs.each do |input|
+        main = get_cwl_main(wf_json)
+        main[:inputs].each do |input|
           if input[:default]
             input_defaults[cwl_shortname(input[:id])] = input[:default]
           end
         end
+        if main[:hints]
+          main[:hints].each do |hint|
+            if hint[:class] == "http://arvados.org/cwl#WorkflowRunnerResources"
+              if hint[:coresMin]
+                runtime_constraints["vcpus"] = hint[:coresMin]
+              end
+              if hint[:ramMin]
+                runtime_constraints["ram"] = hint[:ramMin] * 1024 * 1024
+              end
+            end
+          end
+        end
       end
 
       # mounts
@@ -128,12 +147,6 @@ class WorkUnitsController < ApplicationController
       end
       attrs['mounts'] = mounts
 
-      # runtime constriants
-      runtime_constraints = {
-        "vcpus" => 1,
-        "ram" => 256000000,
-        "API" => true
-      }
       attrs['runtime_constraints'] = runtime_constraints
     else
       raise ArgumentError, "Unsupported template uuid: #{template_uuid}"
index 57b8d8780c6859e9063cabb7c43cfcb30a14d6bf..106716a0f72f178e826afc6eaaf2908ecb8afe0a 100644 (file)
@@ -426,18 +426,23 @@ module ApplicationHelper
     lt
   end
 
-  def get_cwl_inputs(workflow)
-    if workflow[:inputs]
-      return workflow[:inputs]
+  def get_cwl_main(workflow)
+    if workflow[:"$graph"].nil?
+      return workflow
     else
       workflow[:"$graph"].each do |tool|
         if tool[:id] == "#main"
-          return tool[:inputs]
+          return tool
         end
       end
     end
   end
 
+  def get_cwl_inputs(workflow)
+    get_cwl_main(workflow)[:inputs]
+  end
+
+
   def cwl_shortname(id)
     if id[0] == "#"
       id = id[1..-1]
index 915c3a9d381984abec802e78680f84a23ec54b6e..e673c812102143d451fa48887b4cdf9d28e060a6 100644 (file)
@@ -9,6 +9,12 @@ module VersionHelper
     arvados_api_client.discovery[:source_version]
   end
 
+  # Get the packageVersion given in the API server's discovery
+  # document.
+  def api_package_version
+    arvados_api_client.discovery[:packageVersion]
+  end
+
   # URL for browsing source code for the given version.
   def version_link_target version
     "https://arvados.org/projects/arvados/repository/changes?rev=#{version.sub(/-.*/, "")}"
index 86d550a33f34dc8bbccceb556164c76c5bf8a03b..8823fdd5f78f8ebfe7c4a336c3a144bc479de26f 100644 (file)
@@ -14,8 +14,10 @@ SPDX-License-Identifier: AGPL-3.0 %>
   additional_info_str = additional_info.map {|k,v| "#{k}=#{v}"}.join("\n")
 
   additional_info['api_source_version'] = api_source_version
+  additional_info['api_package_version'] = api_package_version
   additional_info['generated_at'] = generated_at
   additional_info['workbench_version'] = AppVersion.hash
+  additional_info['workbench_package_version'] = AppVersion.package_version
   additional_info['arvados_base'] = arvados_base
   additional_info['support_email'] = support_email
   additional_info['error_message'] = params[:error_message] if params[:error_message]
@@ -73,7 +75,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
           <label for="wb_version" class="col-sm-4 control-label"> Workbench version </label>
           <div class="col-sm-8">
             <p class="form-control-static" name="wb_version">
-              <%= link_to AppVersion.hash, version_link_target(AppVersion.hash) %>
+              <%= AppVersion.package_version %> (<%= link_to AppVersion.hash, version_link_target(AppVersion.hash) %>)
             </p>
           </div>
         </div>
@@ -82,7 +84,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
           <label for="server_version" class="col-sm-4 control-label"> API version </label>
           <div class="col-sm-8">
             <p class="form-control-static" name="server_version">
-              <%= link_to api_source_version, version_link_target(api_source_version) %>
+              <%= api_package_version %> (<%= link_to api_source_version, version_link_target(api_source_version) %>)
             </p>
           </div>
         </div>
index 0946a9ddaf1728aa2e018100237d0b7ae568d6e1..e4ec4131286dac66d9a12947ad6d0ddd6bbad358 100644 (file)
@@ -72,6 +72,7 @@ production:
   i18n.fallbacks: true
   active_support.deprecation: :notify
   profiling_enabled: false
+  log_level: info
 
   arvados_insecure_https: false
 
@@ -200,6 +201,11 @@ common:
   # "git log".
   source_version: false
 
+  # Override the automatic package string. With the default value of
+  # false, the package string is read from package-build.version in
+  # Rails.root (included in vendor packages).
+  package_version: false
+
   # report notification to and from addresses
   issue_reporter_email_from: arvados@example.com
   issue_reporter_email_to: arvados@example.com
index cc4b4dee1928f0a6b278d80c9887999cbc6c2f5d..9db76e25728da4e4127ed68cc8064c1d3a4f5d8c 100644 (file)
@@ -15,6 +15,7 @@ class AppVersion
 
   def self.forget
     @hash = nil
+    @package_version = nil
   end
 
   # Return abbrev commit hash for current code version: "abc1234", or
@@ -54,4 +55,18 @@ class AppVersion
 
     @hash || "unknown"
   end
+
+  def self.package_version
+    if (cached = Rails.configuration.package_version || @package_version)
+      return cached
+    end
+
+    begin
+      @package_version = IO.read(Rails.root.join("package-build.version")).strip
+    rescue Errno::ENOENT
+      @package_version = "unknown"
+    end
+
+    @package_version
+  end
 end
index 4b18d037b6b30655714ed174fad84b665ebe7f9f..c5a73cbe35a6116fdbed0b8f364f2af4f0e83df5 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/bash -xe
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
 # SPDX-License-Identifier: AGPL-3.0
@@ -310,6 +310,7 @@ handle_rails_package() {
     cd "$srcdir"
     local license_path="$1"; shift
     local version="$(version_from_git)"
+    echo "$version" >package-build.version
     local scripts_dir="$(mktemp --tmpdir -d "$pkgname-XXXXXXXX.scripts")" && \
     (
         set -e
index 4ec5c08d5f8c0070f120c6300fd1e417bd179827..636c0306ca94a7948b1e4a63302ae467ca7aea37 100755 (executable)
@@ -619,7 +619,8 @@ fi
 # Jenkins config requires that glob tmp/*.log match something. Ensure
 # that happens even if we don't end up running services that set up
 # logging.
-touch "${WORKSPACE}/tmp/controller.log"
+mkdir -p "${WORKSPACE}/tmp/" || fatal "could not mkdir ${WORKSPACE}/tmp"
+touch "${WORKSPACE}/tmp/controller.log" || fatal "could not touch ${WORKSPACE}/tmp/controller.log"
 
 retry() {
     remain="${repeat}"
index a64ff8aced868b7a3c6d6317268e5acdfa84d022..075111d921602bb1a959a2fedaa3bbc747ebb863 100644 (file)
@@ -151,24 +151,25 @@ navbar:
       - install/cheat_sheet.html.textile.liquid
       - user/topics/arvados-sync-groups.html.textile.liquid
       - admin/storage-classes.html.textile.liquid
+      - admin/activation.html.textile.liquid
       - admin/migrating-providers.html.textile.liquid
       - admin/merge-remote-account.html.textile.liquid
+      - admin/spot-instances.html.textile.liquid
       - install/migrate-docker19.html.textile.liquid
   installguide:
     - Overview:
       - install/index.html.textile.liquid
     - Docker quick start:
       - install/arvbox.html.textile.liquid
+    - Arvados on Kubernetes:
+      - install/arvados-on-kubernetes.html.textile.liquid
     - Manual installation:
       - install/install-manual-prerequisites.html.textile.liquid
+      - install/install-components.html.textile.liquid
+    - Core:
       - install/install-postgresql.html.textile.liquid
-      - install/install-sso.html.textile.liquid
       - install/install-api-server.html.textile.liquid
-      - install/install-ws.html.textile.liquid
-      - install/install-arv-git-httpd.html.textile.liquid
-      - install/install-workbench-app.html.textile.liquid
-      - install/install-shell-server.html.textile.liquid
-      - install/create-standard-objects.html.textile.liquid
+    - Keep:
       - install/install-keepstore.html.textile.liquid
       - install/configure-fs-storage.html.textile.liquid
       - install/configure-s3-object-storage.html.textile.liquid
@@ -176,6 +177,14 @@ navbar:
       - install/install-keepproxy.html.textile.liquid
       - install/install-keep-web.html.textile.liquid
       - install/install-keep-balance.html.textile.liquid
+    - User interface:
+      - install/install-sso.html.textile.liquid
+      - install/install-workbench-app.html.textile.liquid
+      - install/install-composer.html.textile.liquid
+    - Additional services:
+      - install/install-ws.html.textile.liquid
+      - install/install-shell-server.html.textile.liquid
+      - install/install-arv-git-httpd.html.textile.liquid
     - Containers API support on SLURM:
       - install/crunch2-slurm/install-prerequisites.html.textile.liquid
       - install/crunch2-slurm/install-slurm.html.textile.liquid
index 6eee4e0447c9715c3f88e3da07e003124ad8f001..abbe6f4c06adef5c7f8826d3e3430ea9386278e0 100644 (file)
@@ -11,3 +11,5 @@ Parameters to be passed to the container scheduler (e.g., SLURM) when running a
 table(table table-bordered table-condensed).
 |_. Key|_. Type|_. Description|_. Notes|
 |partitions|array of strings|The names of one or more compute partitions that may run this container. If not provided, the system will choose where to run the container.|Optional.|
+|preemptible|boolean|If true, the dispatcher will ask for a preemptible cloud node instance (eg: AWS Spot Instance) to run this container.|Optional. Default is false.|
+|max_run_time|integer|Maximum running time (in seconds) that this container will be allowed to run before being cancelled.|Optional. Default is 0 (no limit).|
index 18347785cd07d018b66247af7a90807a6630e2ec..6a1a7318650ceeb0cfd83436b23c55120e759267 100644 (file)
@@ -49,3 +49,30 @@ On Red Hat-based systems, run:
 </notextile>
 
 Finally, reboot the system to make these changes effective.
+
+h2. Create a project for Docker images
+
+Here we create a default project for the standard Arvados Docker images, and give all users read access to it. The project is owned by the system user.
+
+<notextile>
+<pre><code>~$ <span class="userinput">project_uuid=`arv --format=uuid group create --group "{\"owner_uuid\":\"$prefix-tpzed-000000000000000\", \"name\":\"Arvados Standard Docker Images\"}"`</span>
+~$ <span class="userinput">echo "Arvados project uuid is '$project_uuid'"</span>
+~$ <span class="userinput">read -rd $'\000' newlink &lt;&lt;EOF; arv link create --link "$newlink"</span>
+<span class="userinput">{
+ "tail_uuid":"$all_users_group_uuid",
+ "head_uuid":"$project_uuid",
+ "link_class":"permission",
+ "name":"can_read"
+}
+EOF</span>
+</code></pre></notextile>
+
+h2. Download and tag the latest arvados/jobs docker image
+
+In order to start workflows from workbench, there needs to be Docker image tagged @arvados/jobs:latest@. The following command downloads the latest arvados/jobs image from Docker Hub, loads it into Keep, and tags it as 'latest'.  In this example @$project_uuid@ should be the the UUID of the "Arvados Standard Docker Images" project.
+
+<notextile>
+<pre><code>~$ <span class="userinput">arv-keepdocker --project-uuid $project_uuid --pull arvados/jobs latest</span>
+</code></pre></notextile>
+
+If the image needs to be downloaded from Docker Hub, the command can take a few minutes to complete, depending on available network bandwidth.
diff --git a/doc/admin/activation.html.textile.liquid b/doc/admin/activation.html.textile.liquid
new file mode 100644 (file)
index 0000000..4a08e50
--- /dev/null
@@ -0,0 +1,229 @@
+---
+layout: default
+navsection: admin
+title: User activation
+...
+
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+This page describes how new users are created and activated.
+
+"Browser login and management of API tokens is described here.":{{site.baseurl}}/api/tokens.html
+
+h3. Authentication
+
+After completing the authentication process, a callback is made from the SSO server to the API server, providing a user record and @identity_url@ (despite the name, this is actually an Arvados user uuid).
+
+The API server searches for a user record with the @identity_url@ supplied by the SSO.  If found, that user account will be used, unless the account has @redirect_to_user_uuid@ set, in which case it will use the user in @redirect_to_user_uuid@ instead (this is used for the "link account":{{site.baseurl}}/user/topics/link-accounts.html feature).
+
+Next, it searches by email address for a "pre-activated account.":#pre-activated
+
+If no existing user record is found, a new user object will be created.
+
+A federated user follows a slightly different flow, whereby a special token is presented and the API server verifies user's identity with the home cluster, however it also results in a user object (representing the remote user) being created.
+
+h3. User setup
+
+If @auto_setup_new_users@ is true, as part of creating the new user object, the user is immediately set up with:
+
+* @can_login@ @permission@ link going (email address &rarr; user uuid) which records @identity_url_prefix@
+* Membership in the "All users" group (can read all users, all users can see new user)
+* A new git repo and @can_manage@ permission if @auto_setup_new_users_with_repository@ is true
+* @can_login@ permission to a shell node if @auto_setup_new_users_with_vm_uuid@ is set to the uuid of a vm
+
+Otherwise, an admin must explicitly invoke "setup" on the user via workbench or the API.
+
+h3. User activation
+
+A newly created user is inactive (@is_active@ is false) by default unless @new_users_are_active@.
+
+An inactive user cannot create or update any object, but can read Arvados objects that the user account has permission to read.  This implies that if @auto_setup_new_users@ is true, an "inactive" user who has been set up may still be able to do things, such as read things shared with "All users", clone and push to the git repository, or login to a VM.
+
+{% comment %}
+Maybe these services should check is_active.
+
+I believe that when this was originally designed, being able to access git and VM required an ssh key, and an inactive user could not register an ssh key because that required creating a record.  However, it is now possible to authenticate to shell VMs and http+git with just an API token.
+{% endcomment %}
+
+At this point, there are two ways a user can be activated.
+
+# An admin can set the @is_active@ field directly.  This runs @setup_on_activate@ which sets up oid_login_perm and group membership, but does not set repo or vm (even if if @auto_setup_new_users_with_repository@ and/or @auto_setup_new_users_with_vm_uuid@ are set).
+# Self-activation using the @activate@ method of the users controller.
+
+h3. User agreements
+
+The @activate@ method of the users controller checks if the user @is_invited@ and whether the user has "signed" all the user agreements.
+
+@is_invited@ is true if any of these are true:
+* @is_active@ is true
+* @new_users_are_active@ is true
+* the user account has a permission link to read the system "all users" group.
+
+User agreements are accessed by getting a listing on the @user_agreements@ endpoint.  This returns a list of collection uuids.  This is executed as a system user, so it bypasses normal read permission checks.
+
+The available user agreements are represented in the Links table as
+
+<pre>
+{
+  "link_class": "signature",
+  "name": "require",
+  "tail_uuid": "*system user uuid*",
+  "head_uuid: "*collection uuid*"
+}
+</pre>
+
+The collection contains the user agreement text file.
+
+On workbench, it checks @is_invited@.  If true, it displays the clickthrough agreements which the user can "sign".  If @is_invited@ is false, the user ends up at the "inactive user" page.
+
+The @user_agreements/sign@ endpoint creates a Link object:
+
+<pre>
+{
+  "link_class": "signature"
+  "name": "click",
+  "tail_uuid": "*user uuid*",
+  "head_uuid: "*collection uuid*"
+}
+</pre>
+
+This is executed as a system user, so it bypasses the restriction that inactive users cannot create objects.
+
+The @user_agreements/signatures@ endpoint returns the list of Link objects that represent signatures by the current user (created by @sign@).
+
+h3. User profile
+
+The user profile is checked by workbench after checking if user agreements need to be signed.  The requirement to fill out the user profile is not enforced by the API server.
+
+h3(#pre-activated). Pre-activate user by email address
+
+You may create a user account for a user that has not yet logged in, and identify the user by email address.
+
+1. As an admin, create a user object:
+
+<pre>
+{
+  "email": "foo@example.com",
+  "username": "barney",
+  "is_active": true
+}
+</pre>
+
+2. Create a link object, where @tail_uuid@ is the user's email address, @head_uuid@ is the user object created in the previous step, and @xxxxx@ is the value of @uuid_prefix@ of the SSO server.
+
+<pre>
+{
+  "link_class": "permission",
+  "name": "can_login",
+  "tail_uuid": "email address",
+  "head_uuid: "user uuid",
+  "properties": {
+    "identity_url_prefix": "xxxxx-tpzed-"
+  }
+}
+</pre>
+
+3. When the user logs in the first time, the email address will be recognized and the user will be associated with the linked user object.
+
+h3. Pre-activate federated user
+
+1. As admin, create a user object with the @uuid@ of the federated user (this is the user's uuid on their home cluster):
+
+<pre>
+{
+  "uuid": "home1-tpzed-000000000000000",
+  "email": "foo@example.com",
+  "username": "barney",
+  "is_active": true
+}
+</pre>
+
+2. When the user logs in, they will be associated with the existing user object.
+
+h3. Auto-activate federated users from trusted clusters
+
+In the API server config, configure @auto_activate_users_from@ with a list of one or more five-character cluster ids.  A federated user from one of the listed clusters which @is_active@ on the home cluster will be automatically set up and activated on this cluster.
+
+h3(#deactivating_users). Deactivating users
+
+Setting @is_active@ is not sufficient to lock out a user.  The user can call @activate@ to become active again.  Instead, use @unsetup@:
+
+* Delete oid_login_perms
+* Delete git repository permission links
+* Delete VM login permission links
+* Remove from "All users" group
+* Delete any "signatures"
+* Clear preferences / profile
+* Mark as inactive
+
+{% comment %}
+Does not revoke @is_admin@, so you can't unsetup an admin unless you turn admin off first.
+
+"inactive" does not prevent user from reading things they previously had access to.
+
+Does not revoke API tokens.
+{% endcomment %}
+
+h3. Activation flows
+
+h4. Private instance
+
+Policy: users must be manually approved.
+
+<pre>
+auto_setup_new_users: false
+new_users_are_active: false
+</pre>
+
+# User is created.  Not set up.  @is_active@ is false.
+# Workbench checks @is_invited@ and finds it is false.  User gets "inactive user" page.
+# Admin goes to user page and clicks either "setup user" or manually @is_active@ to true.
+# Clicking "setup user" sets up the user.  This includes adding the user to "All users" which qualifies the user as @is_invited@.
+# On refreshing workbench, the user is still inactive, but is able to self-activate after signing clickthrough agreements (if any).
+# Alternately, directly setting @is_active@ to true also sets up the user, but workbench won't display clickthrough agreements (because the user is already active).
+
+h4. Federated instance
+
+Policy: users from other clusters in the federation are activated, users from outside the federation must be manually approved
+
+<pre>
+auto_setup_new_users: false
+new_users_are_active: false
+auto_activate_users_from: [home1]
+</pre>
+
+# Federated user arrives claiming to be from cluster 'home1'
+# API server authenticates user as being from cluster 'home1'
+# Because 'home1' is in @auto_activate_users_from@ the user is set up and activated.
+# User can immediately start using workbench.
+
+h4. Open instance
+
+Policy: anybody who shows up and signs the agreements is activated.
+
+<pre>
+auto_setup_new_users: true
+new_users_are_active: false
+</pre>
+
+# User is created and auto-setup.  At this point, @is_active@ is false, but user has been added to "All users" group.
+# Workbench checks @is_invited@ and finds it is true, because the user is a member of "All users" group.
+# Workbench presents user with list of user agreements, user reads and clicks "sign" for each one.
+# Workbench tries to activate user.
+# User is activated.
+
+h4. Developer instance
+
+Policy: avoid wasting developer's time during development/testing
+
+<pre>
+auto_setup_new_users: true
+new_users_are_active: true
+</pre>
+
+# User is created, immediately auto-setup, and auto-activated.
+# User can immediately start using workbench.
diff --git a/doc/admin/spot-instances.html.textile.liquid b/doc/admin/spot-instances.html.textile.liquid
new file mode 100644 (file)
index 0000000..1c61b60
--- /dev/null
@@ -0,0 +1,78 @@
+---
+layout: default
+navsection: admin
+title: Using AWS Spot instances
+...
+
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+This page describes how to set up the system to take advantage of "Amazon's EC2 spot instances":https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-spot-instances.html.
+
+h3. Nodemanager
+
+Nodemanager should have configured cloud sizes that include the @preemptible@ boolean parameter. For example, for every on-demand cloud node size, you could create a @.spot@ variant, like this:
+
+<pre>
+[Size m4.large]
+cores = 2
+scratch = 32000
+
+[Size m4.large.spot]
+cores = 2
+instance_type = m4.large
+preemptible = true
+scratch = 32000
+</pre>
+
+h3. Slurm dispatcher
+
+The @crunch-dispatch-slurm@ service needs a matching instance type configuration on @/etc/arvados/config.yml@, following the previous example:
+
+<pre>
+Clusters:
+  uuid_prefix:
+    InstanceTypes:
+    - Name: m4.large
+      VCPUs: 2
+      RAM: 7782000000
+      Scratch: 32000000000
+      Price: 0.1
+    - Name: m4.large.spot
+      Preemptible: true
+      VCPUs: 2
+      RAM: 7782000000
+      Scratch: 32000000000
+      Price: 0.1
+</pre>
+
+@InstanceType@ names should match those defined on nodemanager's config file because it's @crunch-dispatch-slurm@'s job to select the instance type and communicate the decision to @nodemanager@ via Slurm.
+
+h3. API Server
+
+Container requests will need the @preemptible@ scheduling parameter included, to make the dispatcher request a spot instance. The API Server configuration file includes an option that when active, will auto assign the @preemptible@ parameter to any new child container request if it doesn't have it already. To activate this feature, the following should be added to the @application.yml@ file:
+
+<pre>
+preemptible_instances: true
+</pre>
+
+With this configuration active, child container requests should include the @preemptible = false@ parameter at creation time to avoid being scheduled for spot instance usage.
+
+h3. AWS Permissions
+
+When requesting spot instances, Amazon's API may return an authorization error depending on how users and permissions are set on the account. If this is the case check nodemanager's log for:
+
+<pre>
+BaseHTTPError: AuthFailure.ServiceLinkedRoleCreationNotPermitted: The provided credentials do not have permission to create the service-linked role for EC2 Spot Instances.
+</pre>
+
+The account needs to have a service linked role created. This can be done by logging into the AWS account, go to _IAM Management_ &rarr; _Roles_ and create the @AWSServiceRoleForEC2Spot@ role by clicking on the @Create@ button, selecting @EC2@ service and @EC2 - Spot Instances@ use case.
+
+h3. Cost Tracking
+
+Amazon's Spot instances prices are declared at instance request time and defined by the maximum price that the user is willing to pay per hour. By default, this price is the same amount as the on-demand version of each instance type, and this setting is the one that nodemanager uses for now, as it doesn't include any pricing data to the spot instance request.
+
+The real price that a spot instance has at any point in time is discovered at the end of each usage hour, depending on instance demand. For this reason, AWS provides a data feed subscription to get hourly logs, as described on "Amazon's User Guide":https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-data-feeds.html.
\ No newline at end of file
index d4ef5ebb78c434312c0b83a31585d07706781e84..e87bc51ad4a590b4102fd4f1047c9b878de466a2 100644 (file)
@@ -50,7 +50,7 @@ table(table table-bordered table-condensed).
 |_. Argument |_. Type |_. Description |_. Location |_. Example |
 {background:#ccffcc}.|uuid|string|The UUID of the group in question.|path||
 |limit|integer (default 100)|Maximum number of items to return.|query||
-|order|string|Order in which to return matching items.  Sort within a resource type by prefixing the attribute with the resource name and a dot.|query|@"collections.modified_at desc"@|
+|order|array|Attributes to use as sort keys to determine the order resources are returned, each optionally followed by @asc@ or @desc@ to indicate ascending or descending order. Sort within a resource type by prefixing the attribute with the resource name and a period.|query|@["collections.modified_at desc"]@|
 |filters|array|Conditions for filtering items.|query|@[["uuid", "is_a", "arvados#job"]]@|
 |recursive|boolean (default false)|Include items owned by subprojects.|query|@true@|
 
index 922df5ab9df5f95dbdfb2a189451d322c2e78d2f..3437003a1874dfef212c66a38a42b28999147686 100644 (file)
@@ -25,6 +25,10 @@ Browser based applications can perform log in via the following highlevel flow:
 
 The "browser authentication process is documented in detail on the Arvados wiki.":https://dev.arvados.org/projects/arvados/wiki/Workbench_authentication_process
 
+h2. User activation
+
+"Creation and activation of new users is described here.":{{site.baseurl}}/admin/activation.html
+
 h2. Creating tokens via the API
 
 The browser login method above issues a new token.  Using that token, it is possible to make API calls to create additional tokens.  To do so, use the @create@ method of the "API client authorizations":{{site.baseurl}}/api/methods/api_client_authorizations.html resource.
diff --git a/doc/install/arvados-on-kubernetes-GKE.html.textile.liquid b/doc/install/arvados-on-kubernetes-GKE.html.textile.liquid
new file mode 100644 (file)
index 0000000..88b2d57
--- /dev/null
@@ -0,0 +1,62 @@
+---
+layout: default
+navsection: installguide
+title: Arvados on Kubernetes - Google Kubernetes Engine
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+This page documents the setup of the prerequisites to run the "Arvados on Kubernetes":/install/arvados-on-kubernetes.html @Helm@ chart on @Google Kubernetes Engine@ (GKE).
+
+h3. Install tooling
+
+Install @gcloud@:
+
+* Follow the instructions at "https://cloud.google.com/sdk/downloads":https://cloud.google.com/sdk/downloads
+
+Install @kubectl@:
+
+<pre>
+$ gcloud components install kubectl
+</pre>
+
+Install @helm@:
+
+* Follow the instructions at "https://docs.helm.sh/using_helm/#installing-helm":https://docs.helm.sh/using_helm/#installing-helm
+
+h3. Boot the GKE cluster
+
+This can be done via the "cloud console":https://console.cloud.google.com/kubernetes/ or via the command line:
+
+<pre>
+$ gcloud container clusters create <CLUSTERNAME> --zone us-central1-a --machine-type n1-standard-2 --cluster-version 1.10
+</pre>
+
+It takes a few minutes for the cluster to be initialized.
+
+h3. Reserve a static IP
+
+Reserve a "static IP":https://console.cloud.google.com/networking/addresses in GCE. Make sure the IP is in the same region as your GKE cluster, and is of the "Regional" type.
+
+h3. Connect to the GKE cluster.
+
+Via the web:
+* Click the "Connect" button next to your "GKE cluster"https://console.cloud.google.com/kubernetes/.
+* Execute the "Command-line access" command on your development machine.
+
+Alternatively, use this command:
+
+<pre>
+$ gcloud container clusters get-credentials <CLUSTERNAME> --zone us-central1-a --project <YOUR-PROJECT>
+</pre>
+
+Test the connection:
+
+<pre>
+$ kubectl get nodes
+</pre>
+
+Now proceed to the "Initialize helm on the Kubernetes cluster":/install/arvados-on-kubernetes.html#helm section.
diff --git a/doc/install/arvados-on-kubernetes-minikube.html.textile.liquid b/doc/install/arvados-on-kubernetes-minikube.html.textile.liquid
new file mode 100644 (file)
index 0000000..132b443
--- /dev/null
@@ -0,0 +1,34 @@
+---
+layout: default
+navsection: installguide
+title: Arvados on Kubernetes - Minikube
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+This page documents the setup of the prerequisites to run the "Arvados on Kubernetes":/install/arvados-on-kubernetes.html @Helm@ chart on @Minikube@.
+
+h3. Install tooling
+
+Install @kubectl@:
+
+* Follow the instructions at "https://kubernetes.io/docs/tasks/tools/install-kubectl/":https://kubernetes.io/docs/tasks/tools/install-kubectl/
+
+Install @helm@:
+
+* Follow the instructions at "https://docs.helm.sh/using_helm/#installing-helm":https://docs.helm.sh/using_helm/#installing-helm
+
+h3. Install Minikube
+
+Follow the instructions at "https://kubernetes.io/docs/setup/minikube/":https://kubernetes.io/docs/setup/minikube/
+
+Test the connection:
+
+<pre>
+$ kubectl get nodes
+</pre>
+
+Now proceed to the "Initialize helm on the Kubernetes cluster":/install/arvados-on-kubernetes.html#helm section.
diff --git a/doc/install/arvados-on-kubernetes.html.textile.liquid b/doc/install/arvados-on-kubernetes.html.textile.liquid
new file mode 100644 (file)
index 0000000..01999f0
--- /dev/null
@@ -0,0 +1,133 @@
+---
+layout: default
+navsection: installguide
+title: Arvados on Kubernetes
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+Arvados on Kubernetes is implemented as a Helm Chart.
+
+{% include 'notebox_begin_warning' %}
+This Helm Chart does not retain any state after it is deleted. An Arvados cluster created with this Helm Chart is entirely ephemeral, and all data stored on the cluster will be deleted when it is shut down. This will be fixed in a future version.
+{% include 'notebox_end' %}
+
+h2(#overview). Overview
+
+This Helm Chart provides a basic, small Arvados cluster.
+
+Current limitations, to be addressed in the future:
+
+* An Arvados cluster created with this Helm Chart is entirely ephemeral, and all data stored on the cluster will be deleted when it is shut down.
+* No dynamic scaling of compute nodes (but you can adjust @values.yaml@ and "reload the Helm Chart":#reload
+* All compute nodes are the same size
+* Compute nodes have no cpu/memory/disk constraints yet
+* No git server
+
+h2. Requirements
+
+* Kubernetes 1.10+ cluster with at least 3 nodes, 2 or more cores per node
+* @kubectl@ and @helm@ installed locally, and able to connect to your Kubernetes cluster
+
+If you do not have a Kubernetes cluster already set up, you can use "Google Kubernetes Engine":/install/arvados-on-kubernetes-GKE.html for multi-node development and testing or "another Kubernetes solution":https://kubernetes.io/docs/setup/pick-right-solution/. Minikube is not supported yet.
+
+h2(#helm). Initialize helm on the Kubernetes cluster
+
+If you already have helm running on the Kubernetes cluster, proceed directly to "Start the Arvados cluster":#Start below.
+
+<pre>
+$ helm init
+$ kubectl create serviceaccount --namespace kube-system tiller
+$ kubectl create clusterrolebinding tiller-cluster-rule --clusterrole=cluster-admin --serviceaccount=kube-system:tiller
+$ kubectl patch deploy --namespace kube-system tiller-deploy -p '{"spec":{"template":{"spec":{"serviceAccount":"tiller"}}}}'
+</pre>
+
+Test @helm@ by running
+
+<pre>
+$ helm ls
+</pre>
+
+There should be no errors. The command will return nothing.
+
+h2(#git). Clone the repository
+
+Clone the repository and nagivate to the @arvados-kubernetes/charts/arvados@ directory:
+
+<pre>
+$ git clone https://github.com/curoverse/arvados-kubernetes.git
+$ cd arvados-kubernetes/charts/arvados
+</pre>
+
+h2(#Start). Start the Arvados cluster
+
+Next, determine the IP address that the Arvados cluster will use to expose its API, Workbench, etc. If you want this Arvados cluster to be reachable from places other than the local machine, the IP address will need to be routable as appropriate.
+
+<pre>
+$ ./cert-gen.sh <IP ADDRESS>
+</pre>
+
+The @values.yaml@ file contains a number of variables that can be modified. At a minimum, review and/or modify the values for
+
+<pre>
+  adminUserEmail
+  adminUserPassword
+  superUserSecret
+  anonymousUserSecret
+</pre>
+
+Now start the Arvados cluster:
+
+<pre>
+$ helm install --name arvados . --set externalIP=<IP ADDRESS>
+</pre>
+
+At this point, you can use kubectl to see the Arvados cluster boot:
+
+<pre>
+$ kubectl get pods
+$ kubectl get svc
+</pre>
+
+After a few minutes, you can access Arvados Workbench at the IP address specified
+
+* https://&lt;IP ADDRESS&gt;
+
+with the username and password specified in the @values.yaml@ file.
+
+Alternatively, use the Arvados cli tools or SDKs:
+
+Set the environment variables:
+
+<pre>
+$ export ARVADOS_API_TOKEN=<superUserSecret from values.yaml>
+$ export ARVADOS_API_HOST=<STATIC IP>:444
+$ export ARVADOS_API_HOST_INSECURE=true
+</pre>
+
+Test access with:
+
+<pre>
+$ arv user current
+</pre>
+
+h2(#reload). Reload
+
+If you make changes to the Helm Chart (e.g. to @values.yaml@), you can reload Arvados with
+
+<pre>
+$ helm upgrade arvados .
+</pre>
+
+h2. Shut down
+
+{% include 'notebox_begin_warning' %}
+This Helm Chart does not retain any state after it is deleted. An Arvados cluster created with this Helm Chart is entirely ephemeral, and <strong>all data stored on the Arvados cluster will be deleted</strong> when it is shut down. This will be fixed in a future version.
+{% include 'notebox_end' %}
+
+<pre>
+$ helm del arvados --purge
+</pre>
diff --git a/doc/install/create-standard-objects.html.textile.liquid b/doc/install/create-standard-objects.html.textile.liquid
deleted file mode 100644 (file)
index 8ac3fb0..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
----
-layout: default
-navsection: installguide
-title: Create standard objects
-
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-In these steps we use the Arvados CLI tools on the <strong>shell server</strong> to create a few Arvados objects. The CLI tools require an ARVADOS_API_TOKEN environment variable with a valid admin token. If you haven't already done so, set that up as shown in the "API token guide":../user/reference/api-tokens.html.
-
-h3. Arvados repository
-
-Here we create a repository object which will be used to set up a hosted clone of the arvados repository on this cluster.
-
-<notextile>
-<pre><code>~$ <span class="userinput">prefix=`arv --format=uuid user current | cut -d- -f1`</span>
-~$ <span class="userinput">echo "Site prefix is '$prefix'"</span>
-~$ <span class="userinput">all_users_group_uuid="$prefix-j7d0g-fffffffffffffff"</span>
-~$ <span class="userinput">repo_uuid=`arv --format=uuid repository create --repository "{\"owner_uuid\":\"$prefix-tpzed-000000000000000\", \"name\":\"arvados\"}"`</span>
-~$ <span class="userinput">echo "Arvados repository uuid is '$repo_uuid'"</span>
-</code></pre></notextile>
-
-Create a link object to make the repository object readable by the "All users" group, and therefore by every active user. This makes it possible for users to run the bundled Crunch scripts by specifying @"script_version":"master","repository":"arvados"@ rather than pulling the Arvados source tree into their own repositories.
-
-<notextile>
-<pre><code>~$ <span class="userinput">read -rd $'\000' newlink &lt;&lt;EOF; arv link create --link "$newlink"</span>
-<span class="userinput">{
- "tail_uuid":"$all_users_group_uuid",
- "head_uuid":"$repo_uuid",
- "link_class":"permission",
- "name":"can_read"
-}
-EOF</span>
-</code></pre></notextile>
-
-In a couple of minutes, your arvados-git-sync cron job will create an empty repository on your git server. Seed it with the real arvados repository. If your git credential helpers were configured correctly when you "set up your shell server":install-shell-server.html, the "git push" command will use your API token instead of prompting you for a username and password.
-
-<notextile>
-<pre><code>~$ <span class="userinput">cd /tmp</span>
-/tmp$ <span class="userinput">git clone --bare https://github.com/curoverse/arvados.git</span>
-/tmp <span class="userinput">git --git-dir arvados.git push https://git.<b>uuid_prefix.your.domain</b>/arvados.git '*:*'</span>
-</code></pre>
-</notextile>
-
-If you did not set up a HTTPS service, you can push to <code>git@git.uuid_prefix.your.domain:arvados.git</code> using your SSH key, or by logging in to your git server and using sudo.
-
-<notextile>
-<pre><code>gitserver:~$ <span class="userinput">sudo -u git -i bash</span>
-git@gitserver:~$ <span class="userinput">git clone --bare https://github.com/curoverse/arvados.git /tmp/arvados.git</span>
-git@gitserver:~$ <span class="userinput">cd /tmp/arvados.git</span>
-git@gitserver:/tmp/arvados.git$ <span class="userinput">gitolite push /var/lib/arvados/git/repositories/<b>your_arvados_repo_uuid</b>.git '*:*'</span>
-</code></pre>
-</notextile>
-
-h3. Default project for docker images
-
-Here we create a default project for the standard Arvados Docker images, and give all users read access to it. The project is owned by the system user.
-
-<notextile>
-<pre><code>~$ <span class="userinput">project_uuid=`arv --format=uuid group create --group "{\"owner_uuid\":\"$prefix-tpzed-000000000000000\", \"name\":\"Arvados Standard Docker Images\"}"`</span>
-~$ <span class="userinput">echo "Arvados project uuid is '$project_uuid'"</span>
-~$ <span class="userinput">read -rd $'\000' newlink &lt;&lt;EOF; arv link create --link "$newlink"</span>
-<span class="userinput">{
- "tail_uuid":"$all_users_group_uuid",
- "head_uuid":"$project_uuid",
- "link_class":"permission",
- "name":"can_read"
-}
-EOF</span>
-</code></pre></notextile>
-
-h3. Download and tag the latest arvados/jobs docker image
-
-The @arvados-cwl-runner@ needs access to an arvados/jobs image that is tagged as 'latest'. The following command downloads the latest arvados/jobs image from Docker Hub, loads it into Keep, and tags it as 'latest'.
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv-keepdocker --pull arvados/jobs latest</span>
-</code></pre></notextile>
-
-If the image needs to be downloaded from Docker Hub, the command can take a few minutes to complete, depending on available network bandwidth.
index 1313ac190d8f9a9884786dbe0ea3a85dafceb409..4b3f4ec0b01fe016def2d2dbaf7e92e95b04787f 100644 (file)
@@ -63,7 +63,7 @@ Edit @/etc/arvados/crunch-dispatch-slurm/crunch-dispatch-slurm.yml@ to authentic
 
 This is the only configuration required by crunch-dispatch-slurm.  The subsections below describe optional configuration flags you can set inside the main configuration object.
 
-h3. Client::KeepServiceURIs
+h3(#KeepServiceURIs). Client::KeepServiceURIs
 
 Override Keep service discovery with a predefined list of Keep URIs. This can be useful if the compute nodes run a local keepstore that should handle all Keep traffic. Example:
 
@@ -76,7 +76,7 @@ Override Keep service discovery with a predefined list of Keep URIs. This can be
 </code></pre>
 </notextile>
 
-h3. PollPeriod
+h3(#PollPeriod). PollPeriod
 
 crunch-dispatch-slurm polls the API server periodically for new containers to run.  The @PollPeriod@ option controls how often this poll happens.  Set this to a string of numbers suffixed with one of the time units @ns@, @us@, @ms@, @s@, @m@, or @h@.  For example:
 
@@ -85,7 +85,7 @@ crunch-dispatch-slurm polls the API server periodically for new containers to ru
 </code></pre>
 </notextile>
 
-h3. PrioritySpread
+h3(#PrioritySpread). PrioritySpread
 
 crunch-dispatch-slurm adjusts the "nice" values of its SLURM jobs to ensure containers are prioritized correctly relative to one another. This option tunes the adjustment mechanism.
 * If non-Arvados jobs run on your SLURM cluster, and your Arvados containers are waiting too long in the SLURM queue because their "nice" values are too high for them to compete with other SLURM jobs, you should use a smaller PrioritySpread value.
@@ -99,11 +99,9 @@ The smallest usable value is @1@. The default value of @10@ is used if this opti
 </code></pre>
 </notextile>
 
+h3(#SbatchArguments). SbatchArguments
 
-
-h3. SbatchArguments
-
-When crunch-dispatch-slurm invokes @sbatch@, you can add switches to the command by specifying @SbatchArguments@.  You can use this to send the jobs to specific cluster partitions or add resource requests.  Set @SbatchArguments@ to an array of strings.  For example:
+When crunch-dispatch-slurm invokes @sbatch@, you can add arguments to the command by specifying @SbatchArguments@.  You can use this to send the jobs to specific cluster partitions or add resource requests.  Set @SbatchArguments@ to an array of strings.  For example:
 
 <notextile>
 <pre><code class="userinput">SbatchArguments:
@@ -111,7 +109,9 @@ When crunch-dispatch-slurm invokes @sbatch@, you can add switches to the command
 </code></pre>
 </notextile>
 
-h3. CrunchRunCommand: Dispatch to SLURM cgroups
+Note: If an argument is supplied multiple times, @slurm@ uses the value of the last occurrence of the argument on the command line.  Arguments specified through Arvados are added after the arguments listed in SbatchArguments.  This means, for example, an Arvados container with that specifies @partitions@ in @scheduling_parameter@ will override an occurrence of @--partition@ in SbatchArguments.  As a result, for container parameters that can be specified through Arvados, SbatchArguments can be used to specify defaults but not enforce specific policy.
+
+h3(#CrunchRunCommand-cgroups). CrunchRunCommand: Dispatch to SLURM cgroups
 
 If your SLURM cluster uses the @task/cgroup@ TaskPlugin, you can configure Crunch's Docker containers to be dispatched inside SLURM's cgroups.  This provides consistent enforcement of resource constraints.  To do this, use a crunch-dispatch-slurm configuration like the following:
 
@@ -122,7 +122,7 @@ If your SLURM cluster uses the @task/cgroup@ TaskPlugin, you can configure Crunc
 </code></pre>
 </notextile>
 
-The choice of subsystem ("memory" in this example) must correspond to one of the resource types enabled in SLURM's @cgroup.conf@. Limits for other resource types will also be respected.  The specified subsystem is singled out only to let Crunch determine the name of the cgroup provided by SLURM.
+The choice of subsystem ("memory" in this example) must correspond to one of the resource types enabled in SLURM's @cgroup.conf@. Limits for other resource types will also be respected.  The specified subsystem is singled out only to let Crunch determine the name of the cgroup provided by SLURM.  When doing this, you should also set "ReserveExtraRAM":#ReserveExtraRAM .
 
 {% include 'notebox_begin' %}
 
@@ -132,7 +132,7 @@ You can work around this issue by disabling the Docker daemon's systemd integrat
 
 {% include 'notebox_end' %}
 
-h3. CrunchRunCommand: Using host networking for containers
+h3(#CrunchRunCommand-network). CrunchRunCommand: Using host networking for containers
 
 Older Linux kernels (prior to 3.18) have bugs in network namespace handling which can lead to compute node lockups.  This by is indicated by blocked kernel tasks in "Workqueue: netns cleanup_net".   If you are experiencing this problem, as a workaround you can disable use of network namespaces by Docker across the cluster.  Be aware this reduces container isolation, which may be a security risk.
 
@@ -144,7 +144,7 @@ Older Linux kernels (prior to 3.18) have bugs in network namespace handling whic
 </code></pre>
 </notextile>
 
-h3. MinRetryPeriod: Rate-limit repeated attempts to start containers
+h3(#MinRetryPeriod). MinRetryPeriod: Rate-limit repeated attempts to start containers
 
 If SLURM is unable to run a container, the dispatcher will submit it again after the next PollPeriod. If PollPeriod is very short, this can be excessive. If MinRetryPeriod is set, the dispatcher will avoid submitting the same container to SLURM more than once in the given time span.
 
@@ -153,6 +153,15 @@ If SLURM is unable to run a container, the dispatcher will submit it again after
 </code></pre>
 </notextile>
 
+h3(#ReserveExtraRAM). ReserveExtraRAM: Extra RAM for jobs
+
+Extra RAM to reserve (in bytes) on each SLURM job submitted by Arvados, which is added to the amount specified in the container's @runtime_constraints@.  If not provided, the default value is zero.  Helpful when using @-cgroup-parent-subsystem@, where @crunch-run@ and @arv-mount@ share the control group memory limit with the user process.  In this situation, at least 256MiB is recommended to accomodate each container's @crunch-run@ and @arv-mount@ processes.
+
+<notextile>
+<pre><code class="userinput">ReserveExtraRAM: <b>268435456</b>
+</code></pre>
+</notextile>
+
 h2. Restart the dispatcher
 
 {% include 'notebox_begin' %}
index c69d18b8e4bd2b0b8e3a19802982fdc284eb0e42..e1593a430a9f89b369e1c67e73f41a6705aa6ce4 100644 (file)
@@ -9,8 +9,6 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
-h2(#slurm). Set up SLURM
-
 On the API server, install SLURM and munge, and generate a munge key.
 
 On Debian-based systems:
index a9b2971087ea46ceb4cf71afd11f40bd36c58159..216810de47174a32d5e913c5faebda29ee90a8b8 100644 (file)
@@ -1,7 +1,7 @@
 ---
 layout: default
 navsection: installguide
-title: Installation overview
+title: Installation options
 ...
 {% comment %}
 Copyright (C) The Arvados Authors. All rights reserved.
@@ -9,9 +9,21 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
-Arvados components run on GNU/Linux systems, and do not depend on any particular cloud operating stack.  Arvados supports Debian and derivatives such as Ubuntu, as well as Red Hat and derivatives such as CentOS.
+Arvados components run on GNU/Linux systems, and supports multiple cloud operating stacks.  Arvados supports Debian and derivatives such as Ubuntu, as well as Red Hat and derivatives such as CentOS.
 
-Arvados components can be installed and configured in a number of different ways.  Step-by-step instructions are available to perform a production installation from packages with manual configuration.  This method assumes you have several (virtual) machines at your disposal for running the various Arvados components.
+Arvados components can be installed and configured in a number of different ways.
 
-* "Docker quick start":arvbox.html
-* "Manual installation":install-manual-prerequisites.html
+<div class="offset1">
+table(table table-bordered table-condensed).
+|||\5=. Appropriate for|
+||_. Ease of setup|_. Multiuser/networked access|_. Workflow Development and Testing|_. Large Scale Production|_. Development of Arvados|_. Arvados System Testing|
+|"Arvados-in-a-box":arvbox.html (arvbox)|Easy|no|yes|no|yes|yes|
+|"Arvados on Kubernetes":arvados-on-kubernetes.html|Easy ^1^|yes|yes ^2^|no ^2^|no|yes|
+|"Manual installation":install-manual-prerequisites.html|Complicated|yes|yes|yes|no|no|
+|"Cloud demo":https://cloud.curoverse.com by Veritas Genetics|N/A ^3^|yes|yes|no|no|no|
+|"Cluster Operation Subscription":https://curoverse.com/products by Veritas Genetics|N/A ^3^|yes|yes|yes|yes|yes|
+</div>
+
+* ^1^ Assumes a Kubernetes cluster is available
+* ^2^ Arvados on Kubernetes is under development and not yet ready for production use
+* ^3^ No installation necessary, Veritas Genetics run and managed
index 2a4d103c7bfd84ea9ecead8515715edd664fcd4d..7f39bf51d2ebafcde977f03890a56d75c49943ee 100644 (file)
@@ -19,6 +19,7 @@ The git hosting setup involves three components.
 It is not strictly necessary to deploy _both_ SSH and HTTPS access, but we recommend deploying both:
 * SSH is a more appropriate way to authenticate from a user's workstation because it does not require managing tokens on the client side;
 * HTTPS is a more appropriate way to authenticate from a shell VM because it does not depend on SSH agent forwarding (SSH clients' agent forwarding features tend to behave as if the remote machine is fully trusted).
+* HTTPS is also used by Arvados Composer to access git repositories from the browser.
 
 The HTTPS instructions given below will not work if you skip the SSH setup steps.
 
@@ -338,3 +339,47 @@ Restart Nginx to make the Nginx and API server configuration changes take effect
 <pre><code>gitserver:~$ <span class="userinput">sudo nginx -s reload</span>
 </code></pre>
 </notextile>
+
+h2. Clone Arvados repository
+
+Here we create a repository object which will be used to set up a hosted clone of the arvados repository on this cluster.
+
+<notextile>
+<pre><code>~$ <span class="userinput">prefix=`arv --format=uuid user current | cut -d- -f1`</span>
+~$ <span class="userinput">echo "Site prefix is '$prefix'"</span>
+~$ <span class="userinput">all_users_group_uuid="$prefix-j7d0g-fffffffffffffff"</span>
+~$ <span class="userinput">repo_uuid=`arv --format=uuid repository create --repository "{\"owner_uuid\":\"$prefix-tpzed-000000000000000\", \"name\":\"arvados\"}"`</span>
+~$ <span class="userinput">echo "Arvados repository uuid is '$repo_uuid'"</span>
+</code></pre></notextile>
+
+Create a link object to make the repository object readable by the "All users" group, and therefore by every active user. This makes it possible for users to run the bundled Crunch scripts by specifying @"script_version":"master","repository":"arvados"@ rather than pulling the Arvados source tree into their own repositories.
+
+<notextile>
+<pre><code>~$ <span class="userinput">read -rd $'\000' newlink &lt;&lt;EOF; arv link create --link "$newlink"</span>
+<span class="userinput">{
+ "tail_uuid":"$all_users_group_uuid",
+ "head_uuid":"$repo_uuid",
+ "link_class":"permission",
+ "name":"can_read"
+}
+EOF</span>
+</code></pre></notextile>
+
+In a couple of minutes, your arvados-git-sync cron job will create an empty repository on your git server. Seed it with the real arvados repository. If your git credential helpers were configured correctly when you "set up your shell server":install-shell-server.html, the "git push" command will use your API token instead of prompting you for a username and password.
+
+<notextile>
+<pre><code>~$ <span class="userinput">cd /tmp</span>
+/tmp$ <span class="userinput">git clone --bare https://github.com/curoverse/arvados.git</span>
+/tmp <span class="userinput">git --git-dir arvados.git push https://git.<b>uuid_prefix.your.domain</b>/arvados.git '*:*'</span>
+</code></pre>
+</notextile>
+
+If you did not set up a HTTPS service, you can push to <code>git@git.uuid_prefix.your.domain:arvados.git</code> using your SSH key, or by logging in to your git server and using sudo.
+
+<notextile>
+<pre><code>gitserver:~$ <span class="userinput">sudo -u git -i bash</span>
+git@gitserver:~$ <span class="userinput">git clone --bare https://github.com/curoverse/arvados.git /tmp/arvados.git</span>
+git@gitserver:~$ <span class="userinput">cd /tmp/arvados.git</span>
+git@gitserver:/tmp/arvados.git$ <span class="userinput">gitolite push /var/lib/arvados/git/repositories/<b>your_arvados_repo_uuid</b>.git '*:*'</span>
+</code></pre>
+</notextile>
diff --git a/doc/install/install-components.html.textile.liquid b/doc/install/install-components.html.textile.liquid
new file mode 100644 (file)
index 0000000..b21c4bd
--- /dev/null
@@ -0,0 +1,28 @@
+---
+layout: default
+navsection: installguide
+title: Choosing which components to install
+...
+
+Arvados consists of many components, some of which may be omitted (at the cost of reduced functionality.)  It may also be helpful to review the "Arvados Architecture":{{site.baseurl}}/architecture to understand how these components interact.
+
+table(table table-bordered table-condensed).
+|\3=. *Core*|
+|"Postgres database":install-postgresql.html |Stores data for the API server.|Required.|
+|"API server":install-api-server.html |Core Arvados logic for managing users, groups, collections, containers, and enforcing permissions.|Required.|
+|\3=. *Keep (storage)*|
+|"Keepstore":install-keepstore.html |Stores content-addressed blocks in a variety of backends (local filesystem, cloud object storage).|Required.|
+|"Keepproxy":install-keepproxy.html |Gateway service to access keep servers from external networks.|Required to be able to use arv-put, arv-get, or arv-mount outside the private Arvados network.|
+|"Keep-web":install-keep-web.html |Gateway service providing read/write HTTP and WebDAV support on top of Keep.|Required to be able to download files from Keep over plain HTTP in Workbench.|
+|"Keep-balance":install-keep-balance.html |Storage cluster maintenance daemon responsible for moving blocks to their optimal server location, adjusting block replication levels, and trashing unreferenced blocks.|Required to free deleted data from underlying storage, and to ensure proper replication and block distribution (including support for storage classes).|
+|\3=. *User interface*|
+|"Single Sign On server":install-sso.html |Login server.|Required for web based login to Workbench.|
+|"Workbench":install-workbench-app.html |Primary graphical user interface for working with file collections and running containers.|Optional.  Depends on API server, SSO server, keep-web, websockets server.|
+|"Workflow Composer":install-composer.html |Graphical user interface for editing Common Workflow Language workflows.|Optional.  Depends on git server (arv-git-httpd).|
+|\3=. *Additional services*|
+|"Websockets server":install-ws.html |Event distribution server.|Required to view streaming container logs in Workbench.|
+|"Shell server":install-shell-server.html |Synchronize (create/delete/configure) Unix shell accounts with Arvados users.|Optional.|
+|"Git server":install-arv-git-httpd.html |Arvados-hosted git repositories, with Arvados-token based authentication.|Optional, but required by Workflow Composer.|
+|\3=. *Crunch (running containers)*|
+|"crunch-dispatch-slurm":crunch2-slurm/install-prerequisites.html |Run analysis workflows using Docker containers distributed across a SLURM cluster.|Optional if you wish to use Arvados for data management only.|
+|"Node Manager":install-nodemanager.html |Allocate and free cloud VM instances on demand based on workload.|Optional, not needed for a static SLURM cluster (such as on-premise HPC).|
diff --git a/doc/install/install-composer.html.textile.liquid b/doc/install/install-composer.html.textile.liquid
new file mode 100644 (file)
index 0000000..9bd25ed
--- /dev/null
@@ -0,0 +1,59 @@
+---
+layout: default
+navsection: installguide
+title: Install Composer
+...
+
+Arvados Composer is a single-page javascript application for building Common Workflow Languge (CWL) Workflows.
+
+h2. Prerequisites
+
+In addition to Arvados core services, Composer requires "Arvados hosted git repositories":install-arv-git-httpd.html which are used for storing workflow files.
+
+h2. Install
+
+Composer may be installed on the same host as Workbench, or on a different host.  Composer communicates directly with the Arvados API server.  It does not require its own backend and should be served as a static file.
+
+On a Debian-based system, install the following package:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install arvados-composer</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, install the following package:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install arvados-composer</span>
+</code></pre>
+</notextile>
+
+h2. Configure
+
+h3. composer.yml
+
+Edit @/etc/arvados/composer/composer.yml@ and set @apiEndPoint@ to your API server:
+
+<pre>
+apiEndPoint: https://zzzzz.arvadosapi.com
+</pre>
+
+h3. Nginx
+
+Add Composer to your Nginx configuration.  This example will host Composer at @/composer@.
+
+<pre>
+location /composer {
+  root   /var/www/arvados-composer
+  index  index.html
+}
+</pre>
+
+h3. Workbench link to composer
+
+Edit the workbench @application.yml@ and set @composer_url@ to the location from which it is served.
+
+<pre>
+production:
+  composer_url: 'https://workbench.zzzzz.arvadosapi.com/composer'
+</pre>
index 4c735a1eec1ec286b2652f6ee5282920c48cc797..3a8dce078dd092bfe687639f912415b2553bf14c 100644 (file)
@@ -57,12 +57,7 @@ h3. Create a keep-balance token
 
 Create an Arvados superuser token for use by keep-balance. *On the API server*, run:
 
-<notextile>
-<pre><code>apiserver:~$ <span class="userinput">cd /var/www/arvados-api/current</span>
-apiserver:/var/www/arvados-api/current$ <span class="userinput">sudo -u <b>webserver-user</b> RAILS_ENV=production bundle exec script/create_superuser_token.rb</span>
-zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
-</code></pre>
-</notextile>
+{% include 'create_superuser_token' %}
 
 h3. Update keepstore configuration files
 
index fe690a5eda8880b67f21fca6c2242e8bf62afead..9f580c0f8b2af0f0244c1ae1570c4346d33cd6ac 100644 (file)
@@ -103,7 +103,18 @@ Note: if the Web uploader is failing to upload data and there are no logs from k
 
 h3. Tell the API server about the Keepproxy server
 
-The API server needs to be informed about the presence of your Keepproxy server. Please execute the following commands on your <strong>shell server</strong>.
+The API server needs to be informed about the presence of your Keepproxy server.
+
+First, if you don't already have an admin token, create a superuser token:
+
+{% include 'create_superuser_token' %}
+
+Configure your environment to run @arv@ using the output of create_superuser_token.rb:
+
+<pre>
+export ARVADOS_API_HOST=zzzzz.example.com
+export ARVADOS_API_TOKEN=zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+</pre>
 
 <notextile>
 <pre><code>~$ <span class="userinput">uuid_prefix=`arv --format=uuid user current | cut -d- -f1`</span>
@@ -117,3 +128,13 @@ The API server needs to be informed about the presence of your Keepproxy server.
 }
 EOF</span>
 </code></pre></notextile>
+
+h3. Testing keepproxy
+
+Log into a host that is on an external network from your private Arvados network.  The host should be able to contact your keepproxy server (eg keep.$uuid_prefix.arvadosapi.com), but not your keepstore servers (eg keep[0-9].$uuid_prefix.arvadosapi.com).
+
+Install the "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html
+
+@ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ must be set in the environment.
+
+You should now be able to use @arv-put@ to upload collections and @arv-get@ to fetch collections, for an example see "Testing keep.":install-keepstore.html#testing on the keepstore install page.
index 750b7a47ef184ecaa91536dd098e3b3b0891562b..64a710f9126fe7aa905817b3fb1fae162407a603 100644 (file)
@@ -198,19 +198,52 @@ Repeat the above sections to prepare volumes and bring up supervised services on
 
 h3. Tell the API server about the Keepstore servers
 
-The API server needs to be informed about the presence of your Keepstore servers. For each of the Keepstore servers you have created, please execute the following commands on your <strong>shell server</strong>.
+The API server needs to be informed about the presence of your Keepstore servers.
 
-Make sure to update the @service_host@ value to match each of your Keepstore servers.
+First, if you don't already have an admin token, create a superuser token:
+
+{% include 'create_superuser_token' %}
+
+Configure your environment to run @arv@ using the output of create_superuser_token.rb:
+
+<pre>
+export ARVADOS_API_HOST=zzzzz.example.com
+export ARVADOS_API_TOKEN=zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+</pre>
+
+Use this command to register each keepstore server you have installed.  Make sure to update the @service_host@ value.
 
 <notextile>
 <pre><code>~$ <span class="userinput">prefix=`arv --format=uuid user current | cut -d- -f1`</span>
 ~$ <span class="userinput">echo "Site prefix is '$prefix'"</span>
 ~$ <span class="userinput">read -rd $'\000' keepservice &lt;&lt;EOF; arv keep_service create --keep-service "$keepservice"</span>
 <span class="userinput">{
- "service_host":"<strong>keep0.$prefix.your.domain</strong>",
+ "service_host":"<strong>keep0.$uuid_prefix.your.domain</strong>",
  "service_port":25107,
  "service_ssl_flag":false,
  "service_type":"disk"
 }
 EOF</span>
 </code></pre></notextile>
+
+h3(#testing). Testing keep
+
+Install the "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html
+
+@ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ must be set in the environment.
+
+You should now be able to use @arv-put@ to upload collections and @arv-get@ to fetch collections:
+
+<pre>
+$ echo "hello world!" > hello.txt
+
+$ arv-put --portable-data-hash hello.txt
+2018-07-12 13:35:25 arvados.arv_put[28702] INFO: Creating new cache file at /home/example/.cache/arvados/arv-put/1571ec0adb397c6a18d5c74cc95b3a2a
+0M / 0M 100.0% 2018-07-12 13:35:27 arvados.arv_put[28702] INFO:
+
+2018-07-12 13:35:27 arvados.arv_put[28702] INFO: Collection saved as 'Saved at 2018-07-12 17:35:25 UTC by example@example'
+59389a8f9ee9d399be35462a0f92541c+53
+
+$ arv-get 59389a8f9ee9d399be35462a0f92541c+53/hello.txt
+hello world!
+</pre>
index 7b1b24e1445d59ac15d1205988d06814eab950eb..e0cc4b8581e65a1a38292f1953418db394f92bee 100644 (file)
@@ -19,7 +19,7 @@ This guide assumes you have seven systems available in the same network subnet:
 
 <div class="offset1">
 table(table table-bordered table-condensed).
-|_Function_|_Number of nodes_|
+|_. Function|_. Number of nodes|
 |Arvados API, Crunch dispatcher, Git, Websockets and Workbench|1|
 |Arvados Compute node|1|
 |Arvados Keepproxy and Keep-web server|1|
@@ -33,7 +33,7 @@ The number of Keepstore, shell and compute nodes listed above is a minimum. In a
 h2. Supported GNU/Linux distributions
 
 table(table table-bordered table-condensed).
-|_Distribution_|_State_|_Last supported version_|
+|_. Distribution|_. State|_. Last supported version|
 |CentOS 7|Supported|Latest|
 |Debian 8 ("jessie")|Supported|Latest|
 |Debian 9 ("stretch")|Supported|Latest|
@@ -73,7 +73,7 @@ First, register the Curoverse signing key in apt's database:
 Configure apt to retrieve packages from the Arvados package repository. This command depends on your OS vendor and version:
 
 table(table table-bordered table-condensed).
-|OS version|Command|
+|_. OS version|_. Command|
 |Debian 8 ("jessie")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ jessie main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 |Debian 9 ("stretch")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ stretch main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 |Ubuntu 14.04 ("trusty")[1]|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ trusty main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
@@ -128,7 +128,7 @@ By convention, we use the following hostname pattern:
 
 <div class="offset1">
 table(table table-bordered table-condensed).
-|_Function_|_Hostname_|
+|_. Function|_. Hostname|
 |Arvados API|@uuid_prefix@.your.domain|
 |Arvados Git server|git.@uuid_prefix@.your.domain|
 |Arvados Keepproxy server|keep.@uuid_prefix@.your.domain|
index 9ee6722a07fa8b71be4b120e431b49f95635d881..defec2589e82a3f32266f39e500c54401ee57683 100644 (file)
@@ -11,7 +11,7 @@ SPDX-License-Identifier: CC-BY-SA-3.0
 
 Arvados Node Manager provides elastic computing for Arvados and SLURM by creating and destroying virtual machines on demand.  Node Manager currently supports Amazon Web Services (AWS), Google Cloud Platform (GCP) and Microsoft Azure.
 
-Note: node manager is only required for elastic computing cloud environments.  Fixed size clusters do not require node manager.
+Note: node manager is only required for elastic computing cloud environments.  Fixed size clusters (such as on-premise HPC) do not require node manager.
 
 h2. Install
 
@@ -113,6 +113,15 @@ boot_fail_after = 1800
 # an Arvados node that hasn't been updated for this long.
 node_stale_after = 14400
 
+# Number of consecutive times a node must report as "idle" before it
+# will be considered eligible for shutdown.  Node status is checked
+# each poll period, and node can go idle at any point during a poll
+# period (meaning a node could be reported as idle that has only been
+# idle for 1 second).  With a 60 second poll period, three consecutive
+# status updates of "idle" suggests the node has been idle at least
+# 121 seconds.
+consecutive_idle_count = 3
+
 # Scaling factor to be applied to nodes' available RAM size. Usually there's a
 # variable discrepancy between the advertised RAM value on cloud nodes and the
 # actual amount available.
@@ -282,6 +291,15 @@ poll_stale_after = 600
 # an Arvados node that hasn't been updated for this long.
 node_stale_after = 14400
 
+# Number of consecutive times a node must report as "idle" before it
+# will be considered eligible for shutdown.  Node status is checked
+# each poll period, and node can go idle at any point during a poll
+# period (meaning a node could be reported as idle that has only been
+# idle for 1 second).  With a 60 second poll period, three consecutive
+# status updates of "idle" suggests the node has been idle at least
+# 121 seconds.
+consecutive_idle_count = 3
+
 # Scaling factor to be applied to nodes' available RAM size. Usually there's a
 # variable discrepancy between the advertised RAM value on cloud nodes and the
 # actual amount available.
@@ -470,6 +488,15 @@ boot_fail_after = 1800
 # an Arvados node that hasn't been updated for this long.
 node_stale_after = 14400
 
+# Number of consecutive times a node must report as "idle" before it
+# will be considered eligible for shutdown.  Node status is checked
+# each poll period, and node can go idle at any point during a poll
+# period (meaning a node could be reported as idle that has only been
+# idle for 1 second).  With a 60 second poll period, three consecutive
+# status updates of "idle" suggests the node has been idle at least
+# 121 seconds.
+consecutive_idle_count = 3
+
 # Scaling factor to be applied to nodes' available RAM size. Usually there's a
 # variable discrepancy between the advertised RAM value on cloud nodes and the
 # actual amount available.
index cf25639b14defda47456d6610458285a06aaecce..f9ecf7a5343b6210ceaf613c796af535a114adb1 100644 (file)
@@ -38,6 +38,11 @@ hints:
     enableReuse: false
   cwltool:Secrets:
     secrets: [input1, input2]
+  cwltool:TimeLimit:
+    timelimit: 14400
+  arv:WorkflowRunnerResources:
+    ramMin: 2048
+    coresMin: 2
 </pre>
 
 The one exception to this is @arv:APIRequirement@, see note below.
@@ -111,3 +116,21 @@ Indicate that one or more input parameters are "secret".  Must be applied at the
 table(table table-bordered table-condensed).
 |_. Field |_. Type |_. Description |
 |secrets|array<string>|Input parameters which are considered "secret".  Must be strings.|
+
+
+h2. cwltool:TimeLimit
+
+Set an upper limit on the execution time of a CommandLineTool or ExpressionTool.  A tool execution which exceeds the time limit may be preemptively terminated and considered failed.  May also be used by batch systems to make scheduling decisions.
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|timelimit|int|Execution time limit in seconds. If set to zero, no limit is enforced.|
+
+h2. arv:WorkflowRunnerResources
+
+Specify resource requirements for the workflow runner process (arvados-cwl-runner) that manages a workflow run.  Must be applied to the top level workflow.  Will also be set implicitly when using @--submit-runner-ram@ on the command line along with @--create-workflow@ or @--update-workflow@.  Use this to adjust the runner's allocation if the workflow runner is getting "out of memory" exceptions or being killed by the out-of-memory (OOM) killer.
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|ramMin|int|RAM, in mebibytes, to reserve for the arvados-cwl-runner process. Default 1 GiB|
+|coresMin|int|Number of cores to reserve to the arvados-cwl-runner process. Default 1 core.|
index 353167e80080cad13bac71d10eb1d33d3dd7034b..8c65cf7acf1b6dd7bc02660464be06ea07cc3daa 100644 (file)
@@ -52,19 +52,15 @@ func (v Version) RunCommand(prog string, args []string, stdin io.Reader, stdout,
 type Multi map[string]Handler
 
 func (m Multi) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
-       if len(args) < 1 {
-               fmt.Fprintf(stderr, "usage: %s command [args]\n", prog)
-               m.Usage(stderr)
-               return 2
-       }
        _, basename := filepath.Split(prog)
-       if strings.HasPrefix(basename, "arvados-") {
-               basename = basename[8:]
-       } else if strings.HasPrefix(basename, "crunch-") {
-               basename = basename[7:]
-       }
+       basename = strings.TrimPrefix(basename, "arvados-")
+       basename = strings.TrimPrefix(basename, "crunch-")
        if cmd, ok := m[basename]; ok {
                return cmd.RunCommand(prog, args, stdin, stdout, stderr)
+       } else if len(args) < 1 {
+               fmt.Fprintf(stderr, "usage: %s command [args]\n", prog)
+               m.Usage(stderr)
+               return 2
        } else if cmd, ok = m[args[0]]; ok {
                return cmd.RunCommand(prog+" "+args[0], args[1:], stdin, stdout, stderr)
        } else {
index 59c2f2a61d9d534f19cfb45ee2a34a38a4381808..a1a69a88e4ccd1bbb6d4882620581e91b3a03523 100644 (file)
@@ -30,6 +30,20 @@ type Handler struct {
 
 func (h *Handler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
        h.setupOnce.Do(h.setup)
+       if req.Method != "GET" && req.Method != "HEAD" {
+               // http.ServeMux returns 301 with a cleaned path if
+               // the incoming request has a double slash. Some
+               // clients (including the Go standard library) change
+               // the request method to GET when following a 301
+               // redirect if the original method was not HEAD
+               // (RFC7231 6.4.2 specifically allows this in the case
+               // of POST). Thus "POST //foo" gets misdirected to
+               // "GET /foo". To avoid this, eliminate double slashes
+               // before passing the request to ServeMux.
+               for strings.Contains(req.URL.Path, "//") {
+                       req.URL.Path = strings.Replace(req.URL.Path, "//", "/", -1)
+               }
+       }
        h.handlerStack.ServeHTTP(w, req)
 }
 
@@ -47,6 +61,11 @@ func (h *Handler) setup() {
        })
        mux.Handle("/", http.HandlerFunc(h.proxyRailsAPI))
        h.handlerStack = mux
+
+       // Changing the global isn't the right way to do this, but a
+       // proper solution would conflict with an impending 13493
+       // merge anyway, so this will do for now.
+       arvados.InsecureHTTPClient.CheckRedirect = func(*http.Request, []*http.Request) error { return http.ErrUseLastResponse }
 }
 
 // headers that shouldn't be forwarded when proxying. See
@@ -89,6 +108,9 @@ func (h *Handler) proxyRailsAPI(w http.ResponseWriter, reqIn *http.Request) {
                xff = xffIn + "," + xff
        }
        hdrOut.Set("X-Forwarded-For", xff)
+       if hdrOut.Get("X-Forwarded-Proto") == "" {
+               hdrOut.Set("X-Forwarded-Proto", reqIn.URL.Scheme)
+       }
        hdrOut.Add("Via", reqIn.Proto+" arvados-controller")
 
        ctx := reqIn.Context()
@@ -101,6 +123,7 @@ func (h *Handler) proxyRailsAPI(w http.ResponseWriter, reqIn *http.Request) {
        reqOut := (&http.Request{
                Method: reqIn.Method,
                URL:    urlOut,
+               Host:   reqIn.Host,
                Header: hdrOut,
                Body:   reqIn.Body,
        }).WithContext(ctx)
index 981ad7ab91919c65327e972e6004b0eb15594352..eb947ea363705293679da1edd3430e2d8d5c0657 100644 (file)
@@ -120,3 +120,11 @@ func (s *HandlerSuite) TestProxyNotFound(c *check.C) {
        c.Check(err, check.IsNil)
        c.Check(jresp["errors"], check.FitsTypeOf, []interface{}{})
 }
+
+func (s *HandlerSuite) TestProxyRedirect(c *check.C) {
+       req := httptest.NewRequest("GET", "https://example.org:1234/login?return_to=foo", nil)
+       resp := httptest.NewRecorder()
+       s.handler.ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusFound)
+       c.Check(resp.Header().Get("Location"), check.Matches, `https://example\.org:1234/auth/joshid\?return_to=foo&?`)
+}
index 4329f4f139e14fc916935a945e2c0935db7f4860..1c36d6cf5bb770cb447b6f7f177d39c5ff7ef469 100644 (file)
@@ -47,20 +47,12 @@ func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) (best arvad
        needRAM := ctr.RuntimeConstraints.RAM + ctr.RuntimeConstraints.KeepCacheRAM
        needRAM = (needRAM * 100) / int64(100-discountConfiguredRAMPercent)
 
-       availableTypes := make([]arvados.InstanceType, len(cc.InstanceTypes))
-       copy(availableTypes, cc.InstanceTypes)
-       sort.Slice(availableTypes, func(a, b int) bool {
-               return availableTypes[a].Price < availableTypes[b].Price
-       })
-       err = ConstraintsNotSatisfiableError{
-               errors.New("constraints not satisfiable by any configured instance type"),
-               availableTypes,
-       }
+       ok := false
        for _, it := range cc.InstanceTypes {
                switch {
-               case err == nil && it.Price > best.Price:
-               case it.Scratch < needScratch:
-               case it.RAM < needRAM:
+               case ok && it.Price > best.Price:
+               case int64(it.Scratch) < needScratch:
+               case int64(it.RAM) < needRAM:
                case it.VCPUs < needVCPUs:
                case it.Preemptible != ctr.SchedulingParameters.Preemptible:
                case it.Price == best.Price && (it.RAM < best.RAM || it.VCPUs < best.VCPUs):
@@ -68,8 +60,22 @@ func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) (best arvad
                default:
                        // Lower price || (same price && better specs)
                        best = it
-                       err = nil
+                       ok = true
+               }
+       }
+       if !ok {
+               availableTypes := make([]arvados.InstanceType, 0, len(cc.InstanceTypes))
+               for _, t := range cc.InstanceTypes {
+                       availableTypes = append(availableTypes, t)
                }
+               sort.Slice(availableTypes, func(a, b int) bool {
+                       return availableTypes[a].Price < availableTypes[b].Price
+               })
+               err = ConstraintsNotSatisfiableError{
+                       errors.New("constraints not satisfiable by any configured instance type"),
+                       availableTypes,
+               }
+               return
        }
        return
 }
index 1484f07a29c6754f80b1fb88faaebcb59290938b..91c6bb1049fb381d9070e747b1f076eec2f95dbc 100644 (file)
@@ -11,7 +11,7 @@ import (
 
 var _ = check.Suite(&NodeSizeSuite{})
 
-const GiB = int64(1 << 30)
+const GiB = arvados.ByteSize(1 << 30)
 
 type NodeSizeSuite struct{}
 
@@ -27,10 +27,10 @@ func (*NodeSizeSuite) TestChooseNotConfigured(c *check.C) {
 
 func (*NodeSizeSuite) TestChooseUnsatisfiable(c *check.C) {
        checkUnsatisfiable := func(ctr *arvados.Container) {
-               _, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: []arvados.InstanceType{
-                       {Price: 1.1, RAM: 1000000000, VCPUs: 2, Name: "small1"},
-                       {Price: 2.2, RAM: 2000000000, VCPUs: 4, Name: "small2"},
-                       {Price: 4.4, RAM: 4000000000, VCPUs: 8, Name: "small4", Scratch: GiB},
+               _, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: map[string]arvados.InstanceType{
+                       "small1": {Price: 1.1, RAM: 1000000000, VCPUs: 2, Name: "small1"},
+                       "small2": {Price: 2.2, RAM: 2000000000, VCPUs: 4, Name: "small2"},
+                       "small4": {Price: 4.4, RAM: 4000000000, VCPUs: 8, Name: "small4", Scratch: GiB},
                }}, ctr)
                c.Check(err, check.FitsTypeOf, ConstraintsNotSatisfiableError{})
        }
@@ -43,40 +43,40 @@ func (*NodeSizeSuite) TestChooseUnsatisfiable(c *check.C) {
                checkUnsatisfiable(&arvados.Container{RuntimeConstraints: rc})
        }
        checkUnsatisfiable(&arvados.Container{
-               Mounts:             map[string]arvados.Mount{"/tmp": {Kind: "tmp", Capacity: 2 * GiB}},
+               Mounts:             map[string]arvados.Mount{"/tmp": {Kind: "tmp", Capacity: int64(2 * GiB)}},
                RuntimeConstraints: arvados.RuntimeConstraints{RAM: 12345, VCPUs: 1},
        })
 }
 
 func (*NodeSizeSuite) TestChoose(c *check.C) {
-       for _, menu := range [][]arvados.InstanceType{
+       for _, menu := range []map[string]arvados.InstanceType{
                {
-                       {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"},
-                       {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "best"},
-                       {Price: 1.1, RAM: 1000000000, VCPUs: 2, Scratch: 2 * GiB, Name: "small"},
+                       "costly": {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"},
+                       "best":   {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "best"},
+                       "small":  {Price: 1.1, RAM: 1000000000, VCPUs: 2, Scratch: 2 * GiB, Name: "small"},
                },
                {
-                       {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"},
-                       {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "goodenough"},
-                       {Price: 2.2, RAM: 4000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "best"},
-                       {Price: 1.1, RAM: 1000000000, VCPUs: 2, Scratch: 2 * GiB, Name: "small"},
+                       "costly":     {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"},
+                       "goodenough": {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "goodenough"},
+                       "best":       {Price: 2.2, RAM: 4000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "best"},
+                       "small":      {Price: 1.1, RAM: 1000000000, VCPUs: 2, Scratch: 2 * GiB, Name: "small"},
                },
                {
-                       {Price: 1.1, RAM: 1000000000, VCPUs: 2, Scratch: 2 * GiB, Name: "small"},
-                       {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "goodenough"},
-                       {Price: 2.2, RAM: 4000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "best"},
-                       {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"},
+                       "small":      {Price: 1.1, RAM: 1000000000, VCPUs: 2, Scratch: 2 * GiB, Name: "small"},
+                       "goodenough": {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "goodenough"},
+                       "best":       {Price: 2.2, RAM: 4000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "best"},
+                       "costly":     {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"},
                },
                {
-                       {Price: 1.1, RAM: 1000000000, VCPUs: 2, Scratch: GiB, Name: "small"},
-                       {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: GiB, Name: "nearly"},
-                       {Price: 3.3, RAM: 4000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "best"},
-                       {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"},
+                       "small":  {Price: 1.1, RAM: 1000000000, VCPUs: 2, Scratch: GiB, Name: "small"},
+                       "nearly": {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: GiB, Name: "nearly"},
+                       "best":   {Price: 3.3, RAM: 4000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "best"},
+                       "costly": {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Name: "costly"},
                },
        } {
                best, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: menu}, &arvados.Container{
                        Mounts: map[string]arvados.Mount{
-                               "/tmp": {Kind: "tmp", Capacity: 2 * GiB},
+                               "/tmp": {Kind: "tmp", Capacity: 2 * int64(GiB)},
                        },
                        RuntimeConstraints: arvados.RuntimeConstraints{
                                VCPUs:        2,
@@ -92,16 +92,16 @@ func (*NodeSizeSuite) TestChoose(c *check.C) {
        }
 }
 
-func (*NodeSizeSuite) TestChoosePreemptible(c *check.C) {
-       menu := []arvados.InstanceType{
-               {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Preemptible: true, Name: "costly"},
-               {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "almost best"},
-               {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: 2 * GiB, Preemptible: true, Name: "best"},
-               {Price: 1.1, RAM: 1000000000, VCPUs: 2, Scratch: 2 * GiB, Preemptible: true, Name: "small"},
+func (*NodeSizeSuite) TestChoosePreemptable(c *check.C) {
+       menu := map[string]arvados.InstanceType{
+               "costly":      {Price: 4.4, RAM: 4000000000, VCPUs: 8, Scratch: 2 * GiB, Preemptible: true, Name: "costly"},
+               "almost best": {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: 2 * GiB, Name: "almost best"},
+               "best":        {Price: 2.2, RAM: 2000000000, VCPUs: 4, Scratch: 2 * GiB, Preemptible: true, Name: "best"},
+               "small":       {Price: 1.1, RAM: 1000000000, VCPUs: 2, Scratch: 2 * GiB, Preemptible: true, Name: "small"},
        }
        best, err := ChooseInstanceType(&arvados.Cluster{InstanceTypes: menu}, &arvados.Container{
                Mounts: map[string]arvados.Mount{
-                       "/tmp": {Kind: "tmp", Capacity: 2 * GiB},
+                       "/tmp": {Kind: "tmp", Capacity: 2 * int64(GiB)},
                },
                RuntimeConstraints: arvados.RuntimeConstraints{
                        VCPUs:        2,
diff --git a/sdk/R/R/zzz.R b/sdk/R/R/zzz.R
new file mode 100644 (file)
index 0000000..c98f803
--- /dev/null
@@ -0,0 +1,10 @@
+.onLoad <- function(libName, pkgName)
+{
+    minAllowedRVersion <- "3.3.0"
+    currentRVersion <- getRversion()
+
+    if(currentRVersion < minAllowedRVersion)
+        print(paste0("Minimum R version required to run ", pkgName, " is ",
+                     minAllowedRVersion, ". Your current version is ",
+                     toString(currentRVersion), ". Please update R and try again."))
+}
index dcfa2186e9edba13493919c6e4eb192efa03c544..be34b2fdb1dabd3531ca74e696a8240528418520 100644 (file)
@@ -31,6 +31,8 @@ On Debian, this is:
 apt-get install build-essential libxml2-dev libssl-dev libcurl4-gnutls-dev
 ```
 
+Minimum R version required to run ArvadosR is 3.3.0.
+
 
 ### Usage
 
index fd48b4852df4f1223eb7ce6fc125fc2234e78f6d..f84636e077e2e5785ed9aa4cb58940beba58cbfc 100644 (file)
@@ -38,7 +38,7 @@ Gem::Specification.new do |s|
   s.add_runtime_dependency 'json', '>= 1.7.7', '<3'
   s.add_runtime_dependency 'trollop', '~> 2.0'
   s.add_runtime_dependency 'andand', '~> 1.3', '>= 1.3.3'
-  s.add_runtime_dependency 'oj', '~> 2.0', '>= 2.0.3'
+  s.add_runtime_dependency 'oj', '~> 3.0'
   s.add_runtime_dependency 'curb', '~> 0.8'
   s.homepage    =
     'https://arvados.org'
index bf419dd9b649e23c19265cfd58db5008eff0caaf..131795ee2c0173703a7385c8676d11536ff17398 100644 (file)
@@ -714,7 +714,7 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
 
     parser.add_argument("--submit-runner-ram", type=int,
                         help="RAM (in MiB) required for the workflow runner job (default 1024)",
-                        default=1024)
+                        default=None)
 
     parser.add_argument("--submit-runner-image", type=str,
                         help="Docker image for workflow runner job, default arvados/jobs:%s" % __version__,
@@ -831,6 +831,8 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
         if api_client is None:
             api_client = arvados.safeapi.ThreadSafeApiCache(api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4})
             keep_client = api_client.keep
+            # Make an API object now so errors are reported early.
+            api_client.users().current().execute()
         if keep_client is None:
             keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
         runner = ArvCwlRunner(api_client, arvargs, keep_client=keep_client, num_retries=4)
index 2ab96c94f0b3e54b42ae51b9b9f42eca6c7071fc..4f762192a2a386f3c08c0d17e5704eccbf8f65e3 100644 (file)
@@ -47,6 +47,28 @@ $graph:
         "_type": "@id"
         refScope: 0
 
+- name: cwltool:TimeLimit
+  type: record
+  inVocab: false
+  extends: cwl:ProcessRequirement
+  doc: |
+    Set an upper limit on the execution time of a CommandLineTool or
+    ExpressionTool.  A tool execution which exceeds the time limit may
+    be preemptively terminated and considered failed.  May also be
+    used by batch systems to make scheduling decisions.
+  fields:
+    - name: class
+      type: string
+      doc: "Always 'TimeLimit'"
+      jsonldPredicate:
+        "_id": "@type"
+        "_type": "@vocab"
+    - name: timelimit
+      type: [long, string]
+      doc: |
+        The time limit, in seconds.  A time limit of zero means no
+        time limit.  Negative time limits are an error.
+
 - name: RunInSingleContainer
   type: record
   extends: cwl:ProcessRequirement
@@ -189,3 +211,25 @@ $graph:
         _type: "@vocab"
     - name: enableReuse
       type: boolean
+
+- name: WorkflowRunnerResources
+  type: record
+  extends: cwl:ProcessRequirement
+  inVocab: false
+  doc: |
+    Specify memory or cores resource request for the CWL runner process itself.
+  fields:
+    class:
+      type: string
+      doc: "Always 'arv:WorkflowRunnerResources'"
+      jsonldPredicate:
+        _id: "@type"
+        _type: "@vocab"
+    ramMin:
+      type: int?
+      doc: Minimum RAM, in mebibytes (2**20)
+      jsonldPredicate: "https://w3id.org/cwl/cwl#ResourceRequirement/ramMin"
+    coresMin:
+      type: int?
+      doc: Minimum cores allocated to cwl-runner
+      jsonldPredicate: "https://w3id.org/cwl/cwl#ResourceRequirement/coresMin"
\ No newline at end of file
index 667f9262f7f92f3c70c59fdad32a0c2a1412ca6a..948a9a46feab30bf3f8759fee94d81d14205e42d 100644 (file)
@@ -11,6 +11,7 @@ import datetime
 import ciso8601
 import uuid
 
+from arvados_cwl.util import get_current_container, get_intermediate_collection_info
 import ruamel.yaml as yaml
 
 from cwltool.errors import WorkflowException
@@ -165,8 +166,14 @@ class ArvadosContainer(JobBase):
 
                 keepemptydirs(vwd)
 
-                with Perf(metrics, "generatefiles.save_new %s" % self.name):
-                    vwd.save_new()
+                if not runtimeContext.current_container:
+                    runtimeContext.current_container = get_current_container(self.arvrunner.api, self.arvrunner.num_retries, logger)
+                info = get_intermediate_collection_info(self.name, runtimeContext.current_container, runtimeContext.intermediate_output_ttl)
+                vwd.save_new(name=info["name"],
+                             owner_uuid=self.arvrunner.project_uuid,
+                             ensure_unique_name=True,
+                             trash_at=info["trash_at"],
+                             properties=info["properties"])
 
                 prev = None
                 for f, p in sorteditems:
@@ -239,6 +246,10 @@ class ArvadosContainer(JobBase):
         if self.output_ttl < 0:
             raise WorkflowException("Invalid value %d for output_ttl, cannot be less than zero" % container_request["output_ttl"])
 
+        if self.timelimit is not None:
+            scheduling_parameters["max_run_time"] = self.timelimit
+
+        container_request["output_name"] = "Output for step %s" % (self.name)
         container_request["output_ttl"] = self.output_ttl
         container_request["mounts"] = mounts
         container_request["secret_mounts"] = secret_mounts
@@ -382,7 +393,7 @@ class RunnerContainer(Runner):
             },
             "secret_mounts": secret_mounts,
             "runtime_constraints": {
-                "vcpus": 1,
+                "vcpus": self.submit_runner_cores,
                 "ram": 1024*1024 * self.submit_runner_ram,
                 "API": True
             },
index 70c2173db9fa2f7ff5054ff4be7252bd64156b67..1287fbb6eaf7b8387ca3fe700c7c97cf0678b867 100644 (file)
@@ -18,6 +18,7 @@ from cwltool.job import JobBase
 
 from schema_salad.sourceline import SourceLine
 
+from arvados_cwl.util import get_current_container, get_intermediate_collection_info
 import ruamel.yaml as yaml
 
 import arvados.collection
@@ -76,7 +77,14 @@ class ArvadosJob(JobBase):
 
                 if vwd:
                     with Perf(metrics, "generatefiles.save_new %s" % self.name):
-                        vwd.save_new()
+                        if not runtimeContext.current_container:
+                            runtimeContext.current_container = get_current_container(self.arvrunner.api, self.arvrunner.num_retries, logger)
+                        info = get_intermediate_collection_info(self.name, runtimeContext.current_container, runtimeContext.intermediate_output_ttl)
+                        vwd.save_new(name=info["name"],
+                                     owner_uuid=self.arvrunner.project_uuid,
+                                     ensure_unique_name=True,
+                                     trash_at=info["trash_at"],
+                                     properties=info["properties"])
 
                 for f, p in generatemapper.items():
                     if p.type == "File":
index b802e9e1656374565484737c2641b7e512679522..ae90625102ff155cd67daa44d4ab4384aa996866 100644 (file)
@@ -52,13 +52,24 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid, uuid=None,
     upload_dependencies(arvRunner, name, tool.doc_loader,
                         packed, tool.tool["id"], False)
 
-    # TODO nowhere for submit_runner_ram to go.
+    if submit_runner_ram:
+        hints = main.get("hints", [])
+        found = False
+        for h in hints:
+            if h["class"] == "http://arvados.org/cwl#WorkflowRunnerResources":
+                h["ramMin"] = submit_runner_ram
+                found = True
+                break
+        if not found:
+            hints.append({"class": "http://arvados.org/cwl#WorkflowRunnerResources",
+                          "ramMin": submit_runner_ram})
+        main["hints"] = hints
 
     body = {
         "workflow": {
             "name": name,
             "description": tool.tool.get("doc", ""),
-            "definition":yaml.round_trip_dump(packed)
+            "definition":json.dumps(packed, sort_keys=True, indent=4, separators=(',',': '))
         }}
     if project_uuid:
         body["workflow"]["owner_uuid"] = project_uuid
index cf0c1fb7e4576f9ef9f6d0f809e05b47e1186586..81e256ed545adbdf5e3be00eccdd108f65be26d4 100644 (file)
@@ -1,3 +1,7 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
 from cwltool.context import LoadingContext, RuntimeContext
 
 class ArvLoadingContext(LoadingContext):
@@ -24,5 +28,6 @@ class ArvRuntimeContext(RuntimeContext):
         self.wait = True
         self.cwl_runner_job = None
         self.storage_classes = "default"
+        self.current_container = None
 
         super(ArvRuntimeContext, self).__init__(kwargs)
index 05a358e0d57a44f26cf6a4ec26c9d1bd35200163..d083b78f5a061906164a5978530af9230e767473 100644 (file)
@@ -8,11 +8,13 @@ import uuid
 import os
 import urllib
 
+from arvados_cwl.util import get_current_container, get_intermediate_collection_info
 import arvados.commands.run
 import arvados.collection
 
 from schema_salad.sourceline import SourceLine
 
+from arvados.errors import ApiError
 from cwltool.pathmapper import PathMapper, MapperEnt, abspath, adjustFileObjs, adjustDirObjs
 from cwltool.workflow import WorkflowException
 
@@ -153,9 +155,14 @@ class ArvPathMapper(PathMapper):
                 for l in srcobj.get("listing", []):
                     self.addentry(l, c, ".", remap)
 
-                check = self.arvrunner.api.collections().list(filters=[["portable_data_hash", "=", c.portable_data_hash()]], limit=1).execute(num_retries=self.arvrunner.num_retries)
-                if not check["items"]:
-                    c.save_new(owner_uuid=self.arvrunner.project_uuid)
+                container = get_current_container(self.arvrunner.api, self.arvrunner.num_retries, logger)
+                info = get_intermediate_collection_info(None, container, self.arvrunner.intermediate_output_ttl)
+
+                c.save_new(name=info["name"],
+                           owner_uuid=self.arvrunner.project_uuid,
+                           ensure_unique_name=True,
+                           trash_at=info["trash_at"],
+                           properties=info["properties"])
 
                 ab = self.collection_pattern % c.portable_data_hash()
                 self._pathmap[srcobj["location"]] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True)
@@ -167,9 +174,14 @@ class ArvPathMapper(PathMapper):
                                                   num_retries=self.arvrunner.num_retries                                                  )
                 self.addentry(srcobj, c, ".", remap)
 
-                check = self.arvrunner.api.collections().list(filters=[["portable_data_hash", "=", c.portable_data_hash()]], limit=1).execute(num_retries=self.arvrunner.num_retries)
-                if not check["items"]:
-                    c.save_new(owner_uuid=self.arvrunner.project_uuid)
+                container = get_current_container(self.arvrunner.api, self.arvrunner.num_retries, logger)
+                info = get_intermediate_collection_info(None, container, self.arvrunner.intermediate_output_ttl)
+
+                c.save_new(name=info["name"],
+                           owner_uuid=self.arvrunner.project_uuid,
+                           ensure_unique_name=True,
+                           trash_at=info["trash_at"],
+                           properties=info["properties"])
 
                 ab = self.file_pattern % (c.portable_data_hash(), srcobj["basename"])
                 self._pathmap[srcobj["location"]] = MapperEnt("keep:%s/%s" % (c.portable_data_hash(), srcobj["basename"]),
@@ -202,6 +214,7 @@ class ArvPathMapper(PathMapper):
         else:
             return None
 
+
 class StagingPathMapper(PathMapper):
     _follow_dirs = True
 
index 12a847503b836d20e9a523ce28b2efb900023b10..3ad1aa6a704632a945b2ed059c10f40a87cdb578 100644 (file)
@@ -7,7 +7,7 @@ import urlparse
 from functools import partial
 import logging
 import json
-import subprocess
+import subprocess32 as subprocess
 from collections import namedtuple
 
 from StringIO import StringIO
@@ -377,13 +377,25 @@ class Runner(object):
         self.priority = priority
         self.secret_store = secret_store
 
+        self.submit_runner_cores = 1
+        self.submit_runner_ram = 1024  # defaut 1 GiB
+
+        runner_resource_req, _ = self.tool.get_requirement("http://arvados.org/cwl#WorkflowRunnerResources")
+        if runner_resource_req:
+            if runner_resource_req.get("coresMin"):
+                self.submit_runner_cores = runner_resource_req["coresMin"]
+            if runner_resource_req.get("ramMin"):
+                self.submit_runner_ram = runner_resource_req["ramMin"]
+
         if submit_runner_ram:
+            # Command line / initializer overrides default and/or spec from workflow
             self.submit_runner_ram = submit_runner_ram
-        else:
-            self.submit_runner_ram = 3000
 
         if self.submit_runner_ram <= 0:
-            raise Exception("Value of --submit-runner-ram must be greater than zero")
+            raise Exception("Value of submit-runner-ram must be greater than zero")
+
+        if self.submit_runner_cores <= 0:
+            raise Exception("Value of submit-runner-cores must be greater than zero")
 
         self.merged_map = merged_map or {}
 
diff --git a/sdk/cwl/arvados_cwl/util.py b/sdk/cwl/arvados_cwl/util.py
new file mode 100644 (file)
index 0000000..98a2a89
--- /dev/null
@@ -0,0 +1,31 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import datetime
+from arvados.errors import ApiError
+
+def get_intermediate_collection_info(workflow_step_name, current_container, intermediate_output_ttl):
+        if workflow_step_name:
+            name = "Intermediate collection for step %s" % (workflow_step_name)
+        else:
+            name = "Intermediate collection"
+        trash_time = None
+        if intermediate_output_ttl > 0:
+            trash_time = datetime.datetime.utcnow() + datetime.timedelta(seconds=intermediate_output_ttl)
+        container_uuid = None
+        if current_container:
+            container_uuid = current_container['uuid']
+        props = {"type": "intermediate", "container": container_uuid}
+
+        return {"name" : name, "trash_at" : trash_time, "properties" : props}
+
+def get_current_container(api, num_retries=0, logger=None):
+    current_container = None
+    try:
+        current_container = api.containers().current().execute(num_retries=num_retries)
+    except ApiError as e:
+        # Status code 404 just means we're not running in a container.
+        if e.resp.status != 404 and logger:
+            logger.info("Getting current container: %s", e)
+    return current_container
index 55548130d3277a10540e867a08f6a0dcfd3e48a9..0cab074d9a8a9755f941c6a59e226d4bd9d1e5f3 100644 (file)
@@ -39,12 +39,16 @@ setup(name='arvados-cwl-runner',
           'ruamel.yaml >=0.13.11, <0.15',
           'arvados-python-client>=1.1.4.20180607143841',
           'setuptools',
-          'ciso8601 >=1.0.6, <2.0.0'
+          'ciso8601 >=1.0.6, <2.0.0',
+          'subprocess32>=3.5.1',
       ],
       data_files=[
           ('share/doc/arvados-cwl-runner', ['LICENSE-2.0.txt', 'README.rst']),
       ],
       test_suite='tests',
-      tests_require=['mock>=1.0'],
+      tests_require=[
+          'mock>=1.0',
+          'subprocess32>=3.5.1',
+      ],
       zip_safe=True
       )
index 55944de21bb0e593e2acce192f36b725f3994216..9bf1c20aabc6591a4b1d00282e9c871456fca219 100644 (file)
@@ -2,48 +2,84 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-cwlVersion: v1.0
-$graph:
-- class: Workflow
-  inputs: []
-  outputs: []
-  steps:
-  - in: []
-    out: []
-    run: '#step1.cwl'
-    id: '#main/step1'
-  - in: []
-    out: []
-    run: '#step2.cwl'
-    id: '#main/step2'
-  id: '#main'
-- class: CommandLineTool
-  inputs:
-  - type: File
-    default:
-      class: File
-      location: keep:b9fca8bf06b170b8507b80b2564ee72b+57/a.txt
-    id: '#step1.cwl/a'
-  - type: File
-    default:
-      class: File
-      location: keep:b9fca8bf06b170b8507b80b2564ee72b+57/b.txt
-    id: '#step1.cwl/b'
-  outputs: []
-  arguments: [echo, $(inputs.a), $(inputs.b)]
-  id: '#step1.cwl'
-- class: CommandLineTool
-  inputs:
-  - type: File
-    default:
-      class: File
-      location: keep:8e2d09a066d96cdffdd2be41579e4e2e+57/b.txt
-    id: '#step2.cwl/b'
-  - type: File
-    default:
-      class: File
-      location: keep:8e2d09a066d96cdffdd2be41579e4e2e+57/c.txt
-    id: '#step2.cwl/c'
-  outputs: []
-  arguments: [echo, $(inputs.c), $(inputs.b)]
-  id: '#step2.cwl'
+{
+    "$graph": [
+        {
+            "class": "Workflow",
+            "id": "#main",
+            "inputs": [],
+            "outputs": [],
+            "steps": [
+                {
+                    "id": "#main/step1",
+                    "in": [],
+                    "out": [],
+                    "run": "#step1.cwl"
+                },
+                {
+                    "id": "#main/step2",
+                    "in": [],
+                    "out": [],
+                    "run": "#step2.cwl"
+                }
+            ]
+        },
+        {
+            "arguments": [
+                "echo",
+                "$(inputs.a)",
+                "$(inputs.b)"
+            ],
+            "class": "CommandLineTool",
+            "id": "#step1.cwl",
+            "inputs": [
+                {
+                    "default": {
+                        "class": "File",
+                        "location": "keep:b9fca8bf06b170b8507b80b2564ee72b+57/a.txt"
+                    },
+                    "id": "#step1.cwl/a",
+                    "type": "File"
+                },
+                {
+                    "default": {
+                        "class": "File",
+                        "location": "keep:b9fca8bf06b170b8507b80b2564ee72b+57/b.txt"
+                    },
+                    "id": "#step1.cwl/b",
+                    "type": "File"
+                }
+            ],
+            "outputs": []
+        },
+        {
+            "arguments": [
+                "echo",
+                "$(inputs.c)",
+                "$(inputs.b)"
+            ],
+            "class": "CommandLineTool",
+            "id": "#step2.cwl",
+            "inputs": [
+                {
+                    "default": {
+                        "class": "File",
+                        "location": "keep:8e2d09a066d96cdffdd2be41579e4e2e+57/b.txt"
+                    },
+                    "id": "#step2.cwl/b",
+                    "type": "File"
+                },
+                {
+                    "default": {
+                        "class": "File",
+                        "location": "keep:8e2d09a066d96cdffdd2be41579e4e2e+57/c.txt"
+                    },
+                    "id": "#step2.cwl/c",
+                    "type": "File"
+                }
+            ],
+            "outputs": []
+        }
+    ],
+    "cwlVersion": "v1.0"
+}
\ No newline at end of file
diff --git a/sdk/cwl/tests/makes_intermediates/echo.cwl b/sdk/cwl/tests/makes_intermediates/echo.cwl
new file mode 100644 (file)
index 0000000..5449bc3
--- /dev/null
@@ -0,0 +1,14 @@
+class: CommandLineTool
+cwlVersion: v1.0
+requirements:
+  InitialWorkDirRequirement:
+    listing:
+      - $(inputs.inp1)
+      - $(inputs.inp2)
+      - $(inputs.inp3)
+inputs:
+  inp1: File
+  inp2: [File, Directory]
+  inp3: Directory
+outputs: []
+arguments: [echo, $(inputs.inp1), $(inputs.inp2), $(inputs.inp3)]
diff --git a/sdk/cwl/tests/makes_intermediates/hello1.txt b/sdk/cwl/tests/makes_intermediates/hello1.txt
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/sdk/cwl/tests/makes_intermediates/run_in_single.cwl b/sdk/cwl/tests/makes_intermediates/run_in_single.cwl
new file mode 100644 (file)
index 0000000..bb596b2
--- /dev/null
@@ -0,0 +1,38 @@
+cwlVersion: v1.0
+class: Workflow
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+requirements:
+  SubworkflowFeatureRequirement: {}
+inputs:
+  inp1:
+    type: File
+    default:
+      class: File
+      location: hello1.txt
+  inp2:
+    type: [File, Directory]
+    default:
+      class: File
+      basename: "hello2.txt"
+      contents: "Hello world"
+  inp3:
+    type: [File, Directory]
+    default:
+      class: Directory
+      basename: inp3
+      listing:
+        - class: File
+          basename: "hello3.txt"
+          contents: "hello world"
+outputs: []
+steps:
+  step1:
+    requirements:
+      arv:RunInSingleContainer: {}
+    in:
+      inp1: inp1
+      inp2: inp2
+      inp3: inp3
+    out: []
+    run: subwf.cwl
diff --git a/sdk/cwl/tests/makes_intermediates/subwf.cwl b/sdk/cwl/tests/makes_intermediates/subwf.cwl
new file mode 100644 (file)
index 0000000..1852ab4
--- /dev/null
@@ -0,0 +1,15 @@
+cwlVersion: v1.0
+class: Workflow
+inputs:
+  inp1: File
+  inp2: File
+  inp3: Directory
+outputs: []
+steps:
+  step1:
+    in:
+      inp1: inp1
+      inp2: inp2
+      inp3: inp3
+    out: []
+    run: echo.cwl
index af6f7721fe82eb3ec0ffa943a0f191391aac75e2..ae234414a3df90888cfbe9028c06aa5efbba9f55 100644 (file)
@@ -21,7 +21,6 @@ if not os.getenv('ARVADOS_DEBUG'):
     logging.getLogger('arvados.cwl-runner').setLevel(logging.WARN)
     logging.getLogger('arvados.arv-run').setLevel(logging.WARN)
 
-
 class TestContainer(unittest.TestCase):
 
     def helper(self, runner, enable_reuse=True):
@@ -100,6 +99,7 @@ class TestContainer(unittest.TestCase):
                                                "capacity": 1073741824 }
                         },
                         'state': 'Committed',
+                        'output_name': 'Output for step test_run_'+str(enable_reuse),
                         'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                         'output_path': '/var/spool/cwl',
                         'output_ttl': 0,
@@ -186,6 +186,7 @@ class TestContainer(unittest.TestCase):
                                    "capacity": 5242880000 }
             },
             'state': 'Committed',
+            'output_name': 'Output for step test_resource_requirements',
             'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
             'output_path': '/var/spool/cwl',
             'output_ttl': 7200,
@@ -318,6 +319,7 @@ class TestContainer(unittest.TestCase):
                 }
             },
             'state': 'Committed',
+            'output_name': 'Output for step test_initial_work_dir',
             'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
             'output_path': '/var/spool/cwl',
             'output_ttl': 0,
@@ -405,6 +407,7 @@ class TestContainer(unittest.TestCase):
                         },
                     },
                     'state': 'Committed',
+                    "output_name": "Output for step test_run_redirect",
                     'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                     'output_path': '/var/spool/cwl',
                     'output_ttl': 0,
@@ -541,6 +544,7 @@ class TestContainer(unittest.TestCase):
                                            "capacity": 1073741824 }
                     },
                     'state': 'Committed',
+                    'output_name': 'Output for step test_run_mounts',
                     'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                     'output_path': '/var/spool/cwl',
                     'output_ttl': 0,
@@ -633,6 +637,7 @@ class TestContainer(unittest.TestCase):
                                            "capacity": 1073741824 }
                     },
                     'state': 'Committed',
+                    'output_name': 'Output for step test_secrets',
                     'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                     'output_path': '/var/spool/cwl',
                     'output_ttl': 0,
@@ -648,3 +653,46 @@ class TestContainer(unittest.TestCase):
                         }
                     }
                 }))
+
+    # The test passes no builder.resources
+    # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
+    @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
+    def test_timelimit(self, keepdocker):
+        arv_docker_clear_cache()
+
+        runner = mock.MagicMock()
+        runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
+        runner.ignore_docker_for_reuse = False
+        runner.intermediate_output_ttl = 0
+        runner.secret_store = cwltool.secrets.SecretStore()
+
+        keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
+        runner.api.collections().get().execute.return_value = {
+            "portable_data_hash": "99999999999999999999999999999993+99"}
+
+        tool = cmap({
+            "inputs": [],
+            "outputs": [],
+            "baseCommand": "ls",
+            "arguments": [{"valueFrom": "$(runtime.outdir)"}],
+            "id": "#",
+            "class": "CommandLineTool",
+            "hints": [
+                {
+                    "class": "http://commonwl.org/cwltool#TimeLimit",
+                    "timelimit": 42
+                }
+            ]
+        })
+
+        loadingContext, runtimeContext = self.helper(runner)
+        runtimeContext.name = "test_timelimit"
+
+        arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext)
+        arvtool.formatgraph = None
+
+        for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
+            j.run(runtimeContext)
+
+        _, kwargs = runner.api.container_requests().create.call_args
+        self.assertEqual(42, kwargs['body']['scheduling_parameters'].get('max_run_time'))
index 8b7e4af81627ac6538783d0e924450ea994d5976..cd46251300dfb95862cb7957f510e108dd78b281 100644 (file)
@@ -1131,6 +1131,42 @@ class TestSubmit(unittest.TestCase):
                          stubs.expect_container_request_uuid + '\n')
 
 
+    @stubs
+    def test_submit_wf_runner_resources(self, stubs):
+        capture_stdout = cStringIO.StringIO()
+        try:
+            exited = arvados_cwl.main(
+                ["--submit", "--no-wait", "--api=containers", "--debug",
+                 "tests/wf/submit_wf_runner_resources.cwl", "tests/submit_test_job.json"],
+                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            self.assertEqual(exited, 0)
+        except:
+            logging.exception("")
+
+        expect_container = copy.deepcopy(stubs.expect_container_spec)
+        expect_container["runtime_constraints"] = {
+            "API": True,
+            "vcpus": 2,
+            "ram": 2000 * 2**20
+        }
+        expect_container["name"] = "submit_wf_runner_resources.cwl"
+        expect_container["mounts"]["/var/lib/cwl/workflow.json"]["content"]["$graph"][1]["hints"] = [
+            {
+                "class": "http://arvados.org/cwl#WorkflowRunnerResources",
+                "coresMin": 2,
+                "ramMin": 2000
+            }
+        ]
+        expect_container["mounts"]["/var/lib/cwl/workflow.json"]["content"]["$graph"][0]["$namespaces"] = {
+            "arv": "http://arvados.org/cwl#",
+        }
+
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+
+
     @mock.patch("arvados.commands.keepdocker.find_one_image_hash")
     @mock.patch("cwltool.docker.DockerCommandLineJob.get_image")
     @mock.patch("arvados.api")
diff --git a/sdk/cwl/tests/test_util.py b/sdk/cwl/tests/test_util.py
new file mode 100644 (file)
index 0000000..2532bd5
--- /dev/null
@@ -0,0 +1,45 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import unittest
+import mock
+import datetime
+import httplib2
+
+from arvados_cwl.util import *
+from arvados.errors import ApiError
+
+class MockDateTime(datetime.datetime):
+    @classmethod
+    def utcnow(cls):
+        return datetime.datetime(2018, 1, 1, 0, 0, 0, 0)
+
+datetime.datetime = MockDateTime
+
+class TestUtil(unittest.TestCase):
+    def test_get_intermediate_collection_info(self):
+        name = "one"
+        current_container = {"uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz"}
+        intermediate_output_ttl = 120
+
+        info = get_intermediate_collection_info(name, current_container, intermediate_output_ttl)
+
+        self.assertEqual(info["name"], "Intermediate collection for step one")
+        self.assertEqual(info["trash_at"], datetime.datetime(2018, 1, 1, 0, 2, 0, 0))
+        self.assertEqual(info["properties"], {"type" : "intermediate", "container" : "zzzzz-8i9sb-zzzzzzzzzzzzzzz"})
+
+    def test_get_current_container_success(self):
+        api = mock.MagicMock()
+        api.containers().current().execute.return_value = {"uuid" : "zzzzz-8i9sb-zzzzzzzzzzzzzzz"}
+
+        current_container = get_current_container(api)
+
+        self.assertEqual(current_container, {"uuid" : "zzzzz-8i9sb-zzzzzzzzzzzzzzz"})
+
+    def test_get_current_container_error(self):
+        api = mock.MagicMock()
+        api.containers().current().execute.side_effect = ApiError(httplib2.Response({"status": 300}), "")
+        logger = mock.MagicMock()
+
+        self.assertRaises(ApiError, get_current_container(api, num_retries=0, logger=logger))
index 7def3e639bfc49f83d2f321b01dfe60fbe9b4711..7b3b4503efc239661f5b03b2afb0cfac3ca8cc4d 100644 (file)
@@ -2,43 +2,91 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-cwlVersion: v1.0
-$graph:
-- class: CommandLineTool
-  requirements:
-  - class: DockerRequirement
-    dockerPull: debian:8
-  inputs:
-  - id: '#submit_tool.cwl/x'
-    type: File
-    default:
-      class: File
-      location: keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt
-    inputBinding:
-      position: 1
-  outputs: []
-  baseCommand: cat
-  id: '#submit_tool.cwl'
-- class: Workflow
-  inputs:
-  - id: '#main/x'
-    type: File
-    default: {class: File, location: keep:169f39d466a5438ac4a90e779bf750c7+53/blorp.txt,
-      size: 16, basename: blorp.txt, nameroot: blorp, nameext: .txt}
-  - id: '#main/y'
-    type: Directory
-    default: {class: Directory, location: keep:99999999999999999999999999999998+99,
-      basename: 99999999999999999999999999999998+99}
-  - id: '#main/z'
-    type: Directory
-    default: {class: Directory, basename: anonymous, listing: [{basename: renamed.txt,
-          class: File, location: keep:99999999999999999999999999999998+99/file1.txt,
-          nameroot: renamed, nameext: .txt}]}
-  outputs: []
-  steps:
-  - id: '#main/step1'
-    in:
-    - {id: '#main/step1/x', source: '#main/x'}
-    out: []
-    run: '#submit_tool.cwl'
-  id: '#main'
+{
+    "$graph": [
+        {
+            "baseCommand": "cat",
+            "class": "CommandLineTool",
+            "id": "#submit_tool.cwl",
+            "inputs": [
+                {
+                    "default": {
+                        "class": "File",
+                        "location": "keep:5d373e7629203ce39e7c22af98a0f881+52/blub.txt"
+                    },
+                    "id": "#submit_tool.cwl/x",
+                    "inputBinding": {
+                        "position": 1
+                    },
+                    "type": "File"
+                }
+            ],
+            "outputs": [],
+            "requirements": [
+                {
+                    "class": "DockerRequirement",
+                    "dockerPull": "debian:8"
+                }
+            ]
+        },
+        {
+            "class": "Workflow",
+            "id": "#main",
+            "inputs": [
+                {
+                    "default": {
+                        "basename": "blorp.txt",
+                        "class": "File",
+                        "location": "keep:169f39d466a5438ac4a90e779bf750c7+53/blorp.txt",
+                        "nameext": ".txt",
+                        "nameroot": "blorp",
+                        "size": 16
+                    },
+                    "id": "#main/x",
+                    "type": "File"
+                },
+                {
+                    "default": {
+                        "basename": "99999999999999999999999999999998+99",
+                        "class": "Directory",
+                        "location": "keep:99999999999999999999999999999998+99"
+                    },
+                    "id": "#main/y",
+                    "type": "Directory"
+                },
+                {
+                    "default": {
+                        "basename": "anonymous",
+                        "class": "Directory",
+                        "listing": [
+                            {
+                                "basename": "renamed.txt",
+                                "class": "File",
+                                "location": "keep:99999999999999999999999999999998+99/file1.txt",
+                                "nameext": ".txt",
+                                "nameroot": "renamed"
+                            }
+                        ]
+                    },
+                    "id": "#main/z",
+                    "type": "Directory"
+                }
+            ],
+            "outputs": [],
+            "steps": [
+                {
+                    "id": "#main/step1",
+                    "in": [
+                        {
+                            "id": "#main/step1/x",
+                            "source": "#main/x"
+                        }
+                    ],
+                    "out": [],
+                    "run": "#submit_tool.cwl"
+                }
+            ]
+        }
+    ],
+    "cwlVersion": "v1.0"
+}
\ No newline at end of file
diff --git a/sdk/cwl/tests/wf/submit_wf_runner_resources.cwl b/sdk/cwl/tests/wf/submit_wf_runner_resources.cwl
new file mode 100644 (file)
index 0000000..9e27121
--- /dev/null
@@ -0,0 +1,31 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Test case for arvados-cwl-runner
+#
+# Used to test whether scanning a workflow file for dependencies
+# (e.g. submit_tool.cwl) and uploading to Keep works as intended.
+
+class: Workflow
+cwlVersion: v1.0
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+hints:
+  arv:WorkflowRunnerResources:
+    ramMin: 2000
+    coresMin: 2
+inputs:
+  - id: x
+    type: File
+  - id: y
+    type: Directory
+  - id: z
+    type: Directory
+outputs: []
+steps:
+  - id: step1
+    in:
+      - { id: x, source: "#x" }
+    out: []
+    run: ../tool/submit_tool.cwl
diff --git a/sdk/go/arvados/byte_size.go b/sdk/go/arvados/byte_size.go
new file mode 100644 (file)
index 0000000..08cc83e
--- /dev/null
@@ -0,0 +1,91 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package arvados
+
+import (
+       "encoding/json"
+       "fmt"
+       "math"
+       "strings"
+)
+
+type ByteSize int64
+
+var prefixValue = map[string]int64{
+       "":   1,
+       "K":  1000,
+       "Ki": 1 << 10,
+       "M":  1000000,
+       "Mi": 1 << 20,
+       "G":  1000000000,
+       "Gi": 1 << 30,
+       "T":  1000000000000,
+       "Ti": 1 << 40,
+       "P":  1000000000000000,
+       "Pi": 1 << 50,
+       "E":  1000000000000000000,
+       "Ei": 1 << 60,
+}
+
+func (n *ByteSize) UnmarshalJSON(data []byte) error {
+       if len(data) == 0 || data[0] != '"' {
+               var i int64
+               err := json.Unmarshal(data, &i)
+               if err != nil {
+                       return err
+               }
+               *n = ByteSize(i)
+               return nil
+       }
+       var s string
+       err := json.Unmarshal(data, &s)
+       if err != nil {
+               return err
+       }
+       split := strings.LastIndexAny(s, "0123456789.+-eE") + 1
+       if split == 0 {
+               return fmt.Errorf("invalid byte size %q", s)
+       }
+       if s[split-1] == 'E' {
+               // We accepted an E as if it started the exponent part
+               // of a json number, but if the next char isn't +, -,
+               // or digit, then the E must have meant Exa. Instead
+               // of "4.5E"+"iB" we want "4.5"+"EiB".
+               split--
+       }
+       var val json.Number
+       dec := json.NewDecoder(strings.NewReader(s[:split]))
+       dec.UseNumber()
+       err = dec.Decode(&val)
+       if err != nil {
+               return err
+       }
+       if split == len(s) {
+               return nil
+       }
+       prefix := strings.Trim(s[split:], " ")
+       if strings.HasSuffix(prefix, "B") {
+               prefix = prefix[:len(prefix)-1]
+       }
+       pval, ok := prefixValue[prefix]
+       if !ok {
+               return fmt.Errorf("invalid unit %q", strings.Trim(s[split:], " "))
+       }
+       if intval, err := val.Int64(); err == nil {
+               if pval > 1 && (intval*pval)/pval != intval {
+                       return fmt.Errorf("size %q overflows int64", s)
+               }
+               *n = ByteSize(intval * pval)
+               return nil
+       } else if floatval, err := val.Float64(); err == nil {
+               if floatval*float64(pval) > math.MaxInt64 {
+                       return fmt.Errorf("size %q overflows int64", s)
+               }
+               *n = ByteSize(int64(floatval * float64(pval)))
+               return nil
+       } else {
+               return fmt.Errorf("bug: json.Number for %q is not int64 or float64: %s", s, err)
+       }
+}
diff --git a/sdk/go/arvados/byte_size_test.go b/sdk/go/arvados/byte_size_test.go
new file mode 100644 (file)
index 0000000..7c4aff2
--- /dev/null
@@ -0,0 +1,70 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package arvados
+
+import (
+       "github.com/ghodss/yaml"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&ByteSizeSuite{})
+
+type ByteSizeSuite struct{}
+
+func (s *ByteSizeSuite) TestUnmarshal(c *check.C) {
+       for _, testcase := range []struct {
+               in  string
+               out int64
+       }{
+               {"0", 0},
+               {"5", 5},
+               {"5B", 5},
+               {"5 B", 5},
+               {" 4 KiB ", 4096},
+               {"0K", 0},
+               {"0Ki", 0},
+               {"0 KiB", 0},
+               {"4K", 4000},
+               {"4KB", 4000},
+               {"4Ki", 4096},
+               {"4KiB", 4096},
+               {"4MB", 4000000},
+               {"4MiB", 4194304},
+               {"4GB", 4000000000},
+               {"4 GiB", 4294967296},
+               {"4TB", 4000000000000},
+               {"4TiB", 4398046511104},
+               {"4PB", 4000000000000000},
+               {"4PiB", 4503599627370496},
+               {"4EB", 4000000000000000000},
+               {"4EiB", 4611686018427387904},
+               {"4.5EiB", 5188146770730811392},
+               {"1.5 GB", 1500000000},
+               {"1.5 GiB", 1610612736},
+               {"1.234 GiB", 1324997410}, // rounds down from 1324997410.816
+               {"1e2 KB", 100000},
+               {"20E-1 KiB", 2048},
+               {"1E0EB", 1000000000000000000},
+               {"1E-1EB", 100000000000000000},
+               {"1E-1EiB", 115292150460684704},
+               {"4.5E15 K", 4500000000000000000},
+       } {
+               var n ByteSize
+               err := yaml.Unmarshal([]byte(testcase.in+"\n"), &n)
+               c.Logf("%v => %v: %v", testcase.in, testcase.out, n)
+               c.Check(err, check.IsNil)
+               c.Check(int64(n), check.Equals, testcase.out)
+       }
+       for _, testcase := range []string{
+               "B", "K", "KB", "KiB", "4BK", "4iB", "4A", "b", "4b", "4mB", "4m", "4mib", "4KIB", "4K iB", "4Ki B", "BB", "4BB",
+               "400000 EB", // overflows int64
+               "4.11e4 EB", // ok as float64, but overflows int64
+       } {
+               var n ByteSize
+               err := yaml.Unmarshal([]byte(testcase+"\n"), &n)
+               c.Logf("%v => error: %v", n, err)
+               c.Check(err, check.NotNil)
+       }
+}
index 182cf8433b2ef0152381b7b7b0acab263685e807..353901855683f296811a42e64b008568071dbdad 100644 (file)
@@ -5,6 +5,8 @@
 package arvados
 
 import (
+       "encoding/json"
+       "errors"
        "fmt"
        "os"
 
@@ -52,7 +54,7 @@ type Cluster struct {
        ClusterID          string `json:"-"`
        ManagementToken    string
        NodeProfiles       map[string]NodeProfile
-       InstanceTypes      []InstanceType
+       InstanceTypes      InstanceTypeMap
        HTTPRequestTimeout Duration
 }
 
@@ -60,12 +62,52 @@ type InstanceType struct {
        Name         string
        ProviderType string
        VCPUs        int
-       RAM          int64
-       Scratch      int64
+       RAM          ByteSize
+       Scratch      ByteSize
        Price        float64
        Preemptible  bool
 }
 
+type InstanceTypeMap map[string]InstanceType
+
+var errDuplicateInstanceTypeName = errors.New("duplicate instance type name")
+
+// UnmarshalJSON handles old config files that provide an array of
+// instance types instead of a hash.
+func (it *InstanceTypeMap) UnmarshalJSON(data []byte) error {
+       if len(data) > 0 && data[0] == '[' {
+               var arr []InstanceType
+               err := json.Unmarshal(data, &arr)
+               if err != nil {
+                       return err
+               }
+               if len(arr) == 0 {
+                       *it = nil
+                       return nil
+               }
+               *it = make(map[string]InstanceType, len(arr))
+               for _, t := range arr {
+                       if _, ok := (*it)[t.Name]; ok {
+                               return errDuplicateInstanceTypeName
+                       }
+                       (*it)[t.Name] = t
+               }
+               return nil
+       }
+       var hash map[string]InstanceType
+       err := json.Unmarshal(data, &hash)
+       if err != nil {
+               return err
+       }
+       // Fill in Name field using hash key.
+       *it = InstanceTypeMap(hash)
+       for name, t := range *it {
+               t.Name = name
+               (*it)[name] = t
+       }
+       return nil
+}
+
 // GetNodeProfile returns a NodeProfile for the given hostname. An
 // error is returned if the appropriate configuration can't be
 // determined (e.g., this does not appear to be a system node). If
diff --git a/sdk/go/arvados/config_test.go b/sdk/go/arvados/config_test.go
new file mode 100644 (file)
index 0000000..59c7432
--- /dev/null
@@ -0,0 +1,37 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package arvados
+
+import (
+       "github.com/ghodss/yaml"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&ConfigSuite{})
+
+type ConfigSuite struct{}
+
+func (s *ConfigSuite) TestInstanceTypesAsArray(c *check.C) {
+       var cluster Cluster
+       yaml.Unmarshal([]byte("InstanceTypes:\n- Name: foo\n"), &cluster)
+       c.Check(len(cluster.InstanceTypes), check.Equals, 1)
+       c.Check(cluster.InstanceTypes["foo"].Name, check.Equals, "foo")
+}
+
+func (s *ConfigSuite) TestInstanceTypesAsHash(c *check.C) {
+       var cluster Cluster
+       yaml.Unmarshal([]byte("InstanceTypes:\n  foo:\n    ProviderType: bar\n"), &cluster)
+       c.Check(len(cluster.InstanceTypes), check.Equals, 1)
+       c.Check(cluster.InstanceTypes["foo"].Name, check.Equals, "foo")
+       c.Check(cluster.InstanceTypes["foo"].ProviderType, check.Equals, "bar")
+}
+
+func (s *ConfigSuite) TestInstanceTypeSize(c *check.C) {
+       var it InstanceType
+       err := yaml.Unmarshal([]byte("Name: foo\nScratch: 4GB\nRAM: 4GiB\n"), &it)
+       c.Check(err, check.IsNil)
+       c.Check(int64(it.Scratch), check.Equals, int64(4000000000))
+       c.Check(int64(it.RAM), check.Equals, int64(4294967296))
+}
index 5398d9d74128cd1d941194d37b3ba10eb71a5942..210ed9981c07292ec3c1508da978eaac351acae7 100644 (file)
@@ -54,6 +54,7 @@ type RuntimeConstraints struct {
 type SchedulingParameters struct {
        Partitions  []string `json:"partitions"`
        Preemptible bool     `json:"preemptible"`
+       MaxRunTime  int      `json:"max_run_time"`
 }
 
 // ContainerList is an arvados#containerList resource.
index bb97f3c1d8186adb0da84f541997157f149c0c1a..c8c70298077092ea8c0b14707e6e6f8563ab2411 100644 (file)
@@ -18,9 +18,7 @@ import os
 import pprint
 import re
 import string
-import subprocess
 import sys
-import threading
 import time
 import types
 import zlib
index 4611a1aadf80043eb9afdeeaff727b27a09eecbc..b652db77d18a73214740672da6588f0fbaab3de3 100644 (file)
@@ -96,6 +96,10 @@ def _intercept_http_request(self, uri, method="GET", headers={}, **kwargs):
                           delay, exc_info=True)
             for conn in self.connections.values():
                 conn.close()
+        except httplib2.SSLHandshakeError as e:
+            # Intercept and re-raise with a better error message.
+            raise httplib2.SSLHandshakeError("Could not connect to %s\n%s\nPossible causes: remote SSL/TLS certificate expired, or was issued by an untrusted certificate authority." % (uri, e))
+
         time.sleep(delay)
         delay = delay * self._retry_delay_backoff
 
@@ -254,9 +258,12 @@ def api_from_config(version=None, apiconfig=None, **kwargs):
     if apiconfig is None:
         apiconfig = config.settings()
 
+    errors = []
     for x in ['ARVADOS_API_HOST', 'ARVADOS_API_TOKEN']:
         if x not in apiconfig:
-            raise ValueError("%s is not set. Aborting." % x)
+            errors.append(x)
+    if errors:
+        raise ValueError(" and ".join(errors)+" not set.\nPlease set in %s or export environment variable." % config.default_config_file)
     host = apiconfig.get('ARVADOS_API_HOST')
     token = apiconfig.get('ARVADOS_API_TOKEN')
     insecure = config.flag_is_true('ARVADOS_API_HOST_INSECURE', apiconfig)
index fe6beab510915dd85b8b29b0c528fb59d9936569..55fc6b626d5de9798b989d2b252d158ca89baab6 100644 (file)
@@ -10,7 +10,7 @@ import errno
 import json
 import os
 import re
-import subprocess
+import subprocess32 as subprocess
 import sys
 import tarfile
 import tempfile
index e8e95afc7013650c67e753a3f2de4e7ec227fc44..71e101cf4c5073d40e78f73c0bf46a9ff231f937 100644 (file)
@@ -292,7 +292,8 @@ class KeepClient(object):
         def __init__(self, root, user_agent_pool=queue.LifoQueue(),
                      upload_counter=None,
                      download_counter=None,
-                     headers={}):
+                     headers={},
+                     insecure=False):
             self.root = root
             self._user_agent_pool = user_agent_pool
             self._result = {'error': None}
@@ -304,6 +305,7 @@ class KeepClient(object):
             self.put_headers = headers
             self.upload_counter = upload_counter
             self.download_counter = download_counter
+            self.insecure = insecure
 
         def usable(self):
             """Is it worth attempting a request?"""
@@ -371,6 +373,8 @@ class KeepClient(object):
                         '{}: {}'.format(k,v) for k,v in self.get_headers.items()])
                     curl.setopt(pycurl.WRITEFUNCTION, response_body.write)
                     curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
+                    if self.insecure:
+                        curl.setopt(pycurl.SSL_VERIFYPEER, 0)
                     if method == "HEAD":
                         curl.setopt(pycurl.NOBODY, True)
                     self._setcurltimeouts(curl, timeout)
@@ -463,6 +467,8 @@ class KeepClient(object):
                         '{}: {}'.format(k,v) for k,v in self.put_headers.items()])
                     curl.setopt(pycurl.WRITEFUNCTION, response_body.write)
                     curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
+                    if self.insecure:
+                        curl.setopt(pycurl.SSL_VERIFYPEER, 0)
                     self._setcurltimeouts(curl, timeout)
                     try:
                         curl.perform()
@@ -762,6 +768,11 @@ class KeepClient(object):
         if local_store is None:
             local_store = os.environ.get('KEEP_LOCAL_STORE')
 
+        if api_client is None:
+            self.insecure = config.flag_is_true('ARVADOS_API_HOST_INSECURE')
+        else:
+            self.insecure = api_client.insecure
+
         self.block_cache = block_cache if block_cache else KeepBlockCache()
         self.timeout = timeout
         self.proxy_timeout = proxy_timeout
@@ -934,7 +945,8 @@ class KeepClient(object):
                     root, self._user_agent_pool,
                     upload_counter=self.upload_counter,
                     download_counter=self.download_counter,
-                    headers=headers)
+                    headers=headers,
+                    insecure=self.insecure)
         return local_roots
 
     @staticmethod
@@ -1035,7 +1047,8 @@ class KeepClient(object):
                 root: self.KeepService(root, self._user_agent_pool,
                                        upload_counter=self.upload_counter,
                                        download_counter=self.download_counter,
-                                       headers=headers)
+                                       headers=headers,
+                                       insecure=self.insecure)
                 for root in hint_roots
             }
 
index b12c121bf8d3f1dbd42f9e7ed0219d1e83583697..c6e17cae0b71a4ca0b580bbb6f8c056da8cb8988 100644 (file)
@@ -26,6 +26,12 @@ class ThreadSafeApiCache(object):
         self.apiconfig = copy.copy(apiconfig)
         self.api_params = api_params
         self.local = threading.local()
+
+        # Initialize an API object for this thread before creating
+        # KeepClient, this will report if ARVADOS_API_HOST or
+        # ARVADOS_API_TOKEN are missing.
+        self.localapi()
+
         self.keep = keep.KeepClient(api_client=self, **keep_params)
 
     def localapi(self):
index 6e3f59dd7ede16fee8eb1da387fa93a8ec83fc4d..5e066f014598560ed211a215ef8866150a77bac3 100644 (file)
@@ -54,6 +54,7 @@ setup(name='arvados-python-client',
           'ruamel.yaml >=0.13.11, <0.15',
           'setuptools',
           'ws4py <0.4',
+          'subprocess32>=3.5.1',
       ],
       test_suite='tests',
       tests_require=['pbr<1.7.0', 'mock>=1.0', 'PyYAML'],
index ce1929fdf710b5960df392cfe348016b4309becb..c21ef95f2af3a18ea8f48352a9e2b780ea1b0e1f 100644 (file)
@@ -7,18 +7,25 @@ error_log "{{ERRORLOG}}" info;          # Yes, must be specified here _and_ cmdl
 events {
 }
 http {
-  access_log "{{ACCESSLOG}}" combined;
+  log_format customlog
+    '[$time_local] $server_name $status $body_bytes_sent $request_time $request_method "$scheme://$http_host$request_uri" $remote_addr:$remote_port '
+    '"$http_referer" "$http_user_agent"';
+  access_log "{{ACCESSLOG}}" customlog;
   client_body_temp_path "{{TMPDIR}}";
   upstream arv-git-http {
     server localhost:{{GITPORT}};
   }
   server {
     listen *:{{GITSSLPORT}} ssl default_server;
-    server_name _;
+    server_name arv-git-http;
     ssl_certificate "{{SSLCERT}}";
     ssl_certificate_key "{{SSLKEY}}";
     location  / {
       proxy_pass http://arv-git-http;
+      proxy_set_header Host $http_host;
+      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+      proxy_set_header X-Forwarded-Proto https;
+      proxy_redirect off;
     }
   }
   upstream keepproxy {
@@ -26,11 +33,15 @@ http {
   }
   server {
     listen *:{{KEEPPROXYSSLPORT}} ssl default_server;
-    server_name _;
+    server_name keepproxy;
     ssl_certificate "{{SSLCERT}}";
     ssl_certificate_key "{{SSLKEY}}";
     location  / {
       proxy_pass http://keepproxy;
+      proxy_set_header Host $http_host;
+      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+      proxy_set_header X-Forwarded-Proto https;
+      proxy_redirect off;
     }
   }
   upstream keep-web {
@@ -38,25 +49,44 @@ http {
   }
   server {
     listen *:{{KEEPWEBSSLPORT}} ssl default_server;
-    server_name ~^(?<request_host>.*)$;
+    server_name keep-web;
     ssl_certificate "{{SSLCERT}}";
     ssl_certificate_key "{{SSLKEY}}";
     location  / {
       proxy_pass http://keep-web;
-      proxy_set_header Host $request_host:{{KEEPWEBPORT}};
+      proxy_set_header Host $http_host;
       proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+      proxy_set_header X-Forwarded-Proto https;
+      proxy_redirect off;
     }
   }
   server {
     listen *:{{KEEPWEBDLSSLPORT}} ssl default_server;
-    server_name ~.*;
+    server_name keep-web-dl ~.*;
     ssl_certificate "{{SSLCERT}}";
     ssl_certificate_key "{{SSLKEY}}";
     location  / {
       proxy_pass http://keep-web;
-      proxy_set_header Host download:{{KEEPWEBPORT}};
       proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-      proxy_redirect //download:{{KEEPWEBPORT}}/ https://$host:{{KEEPWEBDLSSLPORT}}/;
+      proxy_set_header X-Forwarded-Proto https;
+
+      # Unlike other proxy sections, here we need to override the
+      # requested Host header and use proxy_redirect because of the
+      # way the test suite orchestrates services. Keep-web's "download
+      # only" behavior relies on the Host header matching a configured
+      # value, but when run_test_servers.py writes keep-web's command
+      # line, the keep-web-dl TLS port (which clients will connect to
+      # and include in their Host header) has not yet been assigned.
+      #
+      # In production, "proxy_set_header Host $http_host;
+      # proxy_redirect off;" works: keep-web's redirect URLs will
+      # match the request URL received by Nginx.
+      #
+      # Here, keep-web will issue redirects to https://download/ and
+      # Nginx will rewrite them.
+      #
+      proxy_set_header Host  download;
+      proxy_redirect https://download/ https://$host:{{KEEPWEBDLSSLPORT}}/;
     }
   }
   upstream ws {
@@ -64,15 +94,17 @@ http {
   }
   server {
     listen *:{{WSSPORT}} ssl default_server;
-    server_name ~^(?<request_host>.*)$;
+    server_name websocket;
     ssl_certificate "{{SSLCERT}}";
     ssl_certificate_key "{{SSLKEY}}";
     location  / {
       proxy_pass http://ws;
       proxy_set_header Upgrade $http_upgrade;
       proxy_set_header Connection "upgrade";
-      proxy_set_header Host $request_host:{{WSPORT}};
+      proxy_set_header Host $http_host;
       proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+      proxy_set_header X-Forwarded-Proto https;
+      proxy_redirect off;
     }
   }
   upstream controller {
@@ -80,12 +112,15 @@ http {
   }
   server {
     listen *:{{CONTROLLERSSLPORT}} ssl default_server;
-    server_name _;
+    server_name controller;
     ssl_certificate "{{SSLCERT}}";
     ssl_certificate_key "{{SSLKEY}}";
     location  / {
       proxy_pass http://controller;
+      proxy_set_header Host $http_host;
       proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+      proxy_set_header X-Forwarded-Proto https;
+      proxy_redirect off;
     }
   }
 }
index f7ca6daf6f65c190ccd22a1a04ad3cb1996f3e75..102433cd4186fbf392d8f2fc56af804bdec4d890 100644 (file)
@@ -174,7 +174,7 @@ def find_available_port():
     sock.close()
     return port
 
-def _wait_until_port_listens(port, timeout=10):
+def _wait_until_port_listens(port, timeout=10, warn=True):
     """Wait for a process to start listening on the given port.
 
     If nothing listens on the port within the specified timeout (given
@@ -196,11 +196,13 @@ def _wait_until_port_listens(port, timeout=10):
         except subprocess.CalledProcessError:
             time.sleep(0.1)
             continue
-        return
-    print(
-        "WARNING: Nothing is listening on port {} (waited {} seconds).".
-        format(port, timeout),
-        file=sys.stderr)
+        return True
+    if warn:
+        print(
+            "WARNING: Nothing is listening on port {} (waited {} seconds).".
+            format(port, timeout),
+            file=sys.stderr)
+    return False
 
 def _logfilename(label):
     """Set up a labelled log file, and return a path to write logs to.
@@ -375,8 +377,11 @@ def reset():
         'POST',
         headers={'Authorization': 'OAuth2 {}'.format(token)})
     os.environ['ARVADOS_API_HOST_INSECURE'] = 'true'
-    os.environ['ARVADOS_API_HOST'] = existing_api_host
     os.environ['ARVADOS_API_TOKEN'] = token
+    if _wait_until_port_listens(_getport('controller-ssl'), timeout=0.5, warn=False):
+        os.environ['ARVADOS_API_HOST'] = '0.0.0.0:'+str(_getport('controller-ssl'))
+    else:
+        os.environ['ARVADOS_API_HOST'] = existing_api_host
 
 def stop(force=False):
     """Stop the API server, if one is running.
@@ -634,7 +639,7 @@ def run_keep_web():
     keepweb = subprocess.Popen(
         ['keep-web',
          '-allow-anonymous',
-         '-attachment-only-host=download:'+str(keepwebport),
+         '-attachment-only-host=download',
          '-listen=:'+str(keepwebport)],
         env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf)
     with open(_pidfile('keep-web'), 'w') as f:
index 872c93bae25b5480de1cbf91400f716543415700..a7b79933bbc2999381fea887ac3a70e77f346b3c 100644 (file)
@@ -319,6 +319,29 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
         self.assertEqual('100::1', service.hostname)
         self.assertEqual(10, service.port)
 
+    def test_insecure_disables_tls_verify(self):
+        api_client = self.mock_keep_services(count=1)
+        force_timeout = socket.timeout("timed out")
+
+        api_client.insecure = True
+        with tutil.mock_keep_responses(b'foo', 200) as mock:
+            keep_client = arvados.KeepClient(api_client=api_client)
+            keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3')
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.SSL_VERIFYPEER),
+                0)
+
+        api_client.insecure = False
+        with tutil.mock_keep_responses(b'foo', 200) as mock:
+            keep_client = arvados.KeepClient(api_client=api_client)
+            keep_client.get('acbd18db4cc2f85cedef654fccc4a4d8+3')
+            # getopt()==None here means we didn't change the
+            # default. If we were using real pycurl instead of a mock,
+            # it would return the default value 1.
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.SSL_VERIFYPEER),
+                None)
+
     # test_*_timeout verify that KeepClient instructs pycurl to use
     # the appropriate connection and read timeouts. They don't care
     # whether pycurl actually exhibits the expected timeout behavior
@@ -1257,6 +1280,8 @@ class KeepClientAPIErrorTest(unittest.TestCase):
             def __getattr__(self, r):
                 if r == "api_token":
                     return "abc"
+                elif r == "insecure":
+                    return False
                 else:
                     raise arvados.errors.KeepReadError()
         keep_client = arvados.KeepClient(api_client=ApiMock(),
index 7d4d4bba176591a97e2d166c6e291e4588dbe99e..135b8d1d07e1fc6332f1320f614744eef4054dcd 100644 (file)
@@ -32,7 +32,6 @@ gem 'pg', '~> 0.18'
 
 gem 'multi_json'
 gem 'oj'
-gem 'oj_mimic_json'
 
 # for building assets
 gem 'sass-rails',   '~> 4.0'
@@ -62,7 +61,6 @@ gem 'faye-websocket'
 gem 'themes_for_rails', git: 'https://github.com/curoverse/themes_for_rails'
 
 gem 'arvados', '>= 0.1.20150615153458'
-gem 'arvados-cli', '>= 0.1.20161017193526'
 gem 'httpclient'
 
 gem 'sshkey'
index 6aaaea77019ea6654e5a97ff3c7b1bd232bd00a7..1514ce1342064ce9698f3905047255692a3e1a5e 100644 (file)
@@ -58,15 +58,6 @@ GEM
       i18n (~> 0)
       json (>= 1.7.7, < 3)
       jwt (>= 0.1.5, < 2)
-    arvados-cli (1.1.4.20180412190507)
-      activesupport (>= 3.2.13, < 5)
-      andand (~> 1.3, >= 1.3.3)
-      arvados (~> 0.1, >= 0.1.20150128223554)
-      curb (~> 0.8)
-      google-api-client (~> 0.6, >= 0.6.3, < 0.8.9)
-      json (>= 1.7.7, < 3)
-      oj (~> 2.0, >= 2.0.3)
-      trollop (~> 2.0)
     autoparse (0.3.3)
       addressable (>= 2.3.1)
       extlib (>= 0.9.15)
@@ -87,7 +78,6 @@ GEM
     coffee-script-source (1.12.2)
     concurrent-ruby (1.0.5)
     crass (1.0.4)
-    curb (0.9.4)
     database_cleaner (1.7.0)
     erubis (2.7.0)
     eventmachine (1.2.6)
@@ -179,8 +169,7 @@ GEM
       multi_json (~> 1.3)
       multi_xml (~> 0.5)
       rack (>= 1.2, < 3)
-    oj (2.18.5)
-    oj_mimic_json (1.0.1)
+    oj (3.6.4)
     omniauth (1.4.3)
       hashie (>= 1.2, < 4)
       rack (>= 1.6.2, < 3)
@@ -254,7 +243,7 @@ GEM
     simplecov-html (0.7.1)
     simplecov-rcov (0.2.3)
       simplecov (>= 0.4.1)
-    sprockets (2.12.4)
+    sprockets (2.12.5)
       hike (~> 1.2)
       multi_json (~> 1.0)
       rack (~> 1.0)
@@ -292,7 +281,6 @@ DEPENDENCIES
   acts_as_api
   andand
   arvados (>= 0.1.20150615153458)
-  arvados-cli (>= 0.1.20161017193526)
   coffee-rails (~> 4.0)
   database_cleaner
   factory_girl_rails
@@ -304,7 +292,6 @@ DEPENDENCIES
   mocha
   multi_json
   oj
-  oj_mimic_json
   omniauth (~> 1.4.0)
   omniauth-oauth2 (~> 1.1)
   passenger
@@ -328,4 +315,4 @@ DEPENDENCIES
   uglifier (~> 2.0)
 
 BUNDLED WITH
-   1.16.1
+   1.16.2
index adac9960c41a06fff4da68da67e87a0ebf6facd6..49fc398e14bc86232ec8f791ffa0d986a376c48a 100644 (file)
@@ -33,6 +33,8 @@ class Arvados::V1::SchemaController < ApplicationController
         version: "v1",
         revision: "20131114",
         source_version: AppVersion.hash,
+        sourceVersion: AppVersion.hash, # source_version should be deprecated in the future
+        packageVersion: AppVersion.package_version,
         generatedAt: db_current_time.iso8601,
         title: "Arvados API",
         description: "The API to interact with Arvados.",
index b267a63882d4a5b9f23853d99b9afeebae8f397e..8ea9f7bd885a396541b2e1db9f6c9c55688ba870 100644 (file)
@@ -161,7 +161,8 @@ class ApiClientAuthorization < ArvadosModel
           end
         end
 
-        if Rails.configuration.new_users_are_active
+        if Rails.configuration.new_users_are_active ||
+           Rails.configuration.auto_activate_users_from.include?(remote_user['uuid'][0..4])
           # Update is_active to whatever it is at the remote end
           user.is_active = remote_user['is_active']
         elsif !remote_user['is_active']
index 799aa430fbe46906b7e0768700e8fc3f0d8fe3f7..dd3ff767dd4c8f86b523add765afe2f3516fba5d 100644 (file)
@@ -28,8 +28,8 @@ class ContainerRequest < ArvadosModel
 
   before_validation :fill_field_defaults, :if => :new_record?
   before_validation :validate_runtime_constraints
-  before_validation :set_container
   before_validation :set_default_preemptible_scheduling_parameter
+  before_validation :set_container
   validates :command, :container_image, :output_path, :cwd, :presence => true
   validates :output_ttl, numericality: { only_integer: true, greater_than_or_equal_to: 0 }
   validates :priority, numericality: { only_integer: true, greater_than_or_equal_to: 0, less_than_or_equal_to: 1000 }
@@ -199,11 +199,11 @@ class ContainerRequest < ArvadosModel
   end
 
   def set_default_preemptible_scheduling_parameter
+    c = get_requesting_container()
     if self.state == Committed
       # If preemptible instances (eg: AWS Spot Instances) are allowed,
       # ask them on child containers by default.
-      if Rails.configuration.preemptible_instances and
-        !self.requesting_container_uuid.nil? and
+      if Rails.configuration.preemptible_instances and !c.nil? and
         self.scheduling_parameters['preemptible'].nil?
           self.scheduling_parameters['preemptible'] = true
       end
@@ -239,6 +239,11 @@ class ContainerRequest < ArvadosModel
       if !Rails.configuration.preemptible_instances and scheduling_parameters['preemptible']
         errors.add :scheduling_parameters, "preemptible instances are not allowed"
       end
+      if scheduling_parameters.include? 'max_run_time' and
+        (!scheduling_parameters['max_run_time'].is_a?(Integer) ||
+          scheduling_parameters['max_run_time'] < 0)
+          errors.add :scheduling_parameters, "max_run_time must be positive integer"
+      end
     end
   end
 
@@ -313,10 +318,18 @@ class ContainerRequest < ArvadosModel
   end
 
   def set_requesting_container_uuid
-    return if !current_api_client_authorization
-    if (c = Container.where('auth_uuid=?', current_api_client_authorization.uuid).select([:uuid, :priority]).first)
+    c = get_requesting_container()
+    if !c.nil?
       self.requesting_container_uuid = c.uuid
       self.priority = c.priority>0 ? 1 : 0
     end
   end
+
+  def get_requesting_container
+    return self.requesting_container_uuid if !self.requesting_container_uuid.nil?
+    return if !current_api_client_authorization
+    if (c = Container.where('auth_uuid=?', current_api_client_authorization.uuid).select([:uuid, :priority]).first)
+      return c
+    end
+  end
 end
index 9d4c20af9faaa1ff7076fdcd0bd8d0348324e4ef..cc3a22cbf0d75f93563bfb375d1306141e958a26 100644 (file)
@@ -30,6 +30,7 @@ class User < ArvadosModel
   before_create :set_initial_username, :if => Proc.new { |user|
     user.username.nil? and user.email
   }
+  after_create :setup_on_activate
   after_create :add_system_group_permission_link
   after_create :invalidate_permissions_cache
   after_create :auto_setup_new_user, :if => Proc.new { |user|
@@ -463,7 +464,7 @@ class User < ArvadosModel
 
     if !oid_login_perms.any?
       # create openid login permission
-      oid_login_perm = Link.create(link_class: 'permission',
+      oid_login_perm = Link.create!(link_class: 'permission',
                                    name: 'can_login',
                                    tail_uuid: self.email,
                                    head_uuid: self.uuid,
index f51679135d0b462beb9211107c8e742f505806e9..f976a83ca96bf4cffb562cd74f3ccaf27590198f 100644 (file)
@@ -117,7 +117,11 @@ common:
   ### New user and & email settings
   ###
 
-  # Config parameters to automatically setup new users.
+  # Config parameters to automatically setup new users.  If enabled,
+  # this users will be able to self-activate.  Enable this if you want
+  # to run an open instance where anyone can create an account and use
+  # the system without requiring manual approval.
+  #
   # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on.
   # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup.
   auto_setup_new_users: false
@@ -125,7 +129,9 @@ common:
   auto_setup_new_users_with_repository: false
   auto_setup_name_blacklist: [arvados, git, gitolite, gitolite-admin, root, syslog]
 
-  # When new_users_are_active is set to true, the user agreement check is skipped.
+  # When new_users_are_active is set to true, new users will be active
+  # immediately.  This skips the "self-activate" step which enforces
+  # user agreements.  Should only be enabled for development.
   new_users_are_active: false
 
   # The e-mail address of the user you would like to become marked as an admin
@@ -409,6 +415,12 @@ common:
   # remote_hosts above.
   remote_hosts_via_dns: false
 
+  # List of cluster prefixes.  These are "trusted" clusters, users
+  # from the clusters listed here will be automatically setup and
+  # activated.  This is separate from the settings
+  # auto_setup_new_users and new_users_are_active.
+  auto_activate_users_from: []
+
   ###
   ### Remaining assorted configuration options.
   ###
@@ -454,6 +466,11 @@ common:
   # "git log".
   source_version: false
 
+  # Override the automatic package version string. With the default version of
+  # false, the package version is read from package-build.version in Rails.root
+  # (included in vendor packages).
+  package_version: false
+
   # Enable asynchronous permission graph rebuild.  Must run
   # script/permission-updater.rb as a separate process.  When the permission
   # cache is invalidated, the background process will update the permission
diff --git a/services/api/config/initializers/oj_mimic_json.rb b/services/api/config/initializers/oj_mimic_json.rb
new file mode 100644 (file)
index 0000000..ce2d40c
--- /dev/null
@@ -0,0 +1,11 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require 'oj'
+
+Oj::Rails.set_encoder()
+Oj::Rails.set_decoder()
+Oj::Rails.optimize()
+Oj::Rails.mimic_JSON()
+
index 3d690930ae18a1a4d1956f0872b37eec77d9d228..78cabc87ac7cd5f66a07becb1207be53ff6e2af3 100644 (file)
@@ -2,6 +2,8 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+ActiveSupport::JSON::Encoding.time_precision = 9
+
 class ActiveSupport::TimeWithZone
   remove_method :as_json
   def as_json *args
index abcf40ee3666622d9b2c0209531cd733c730f783..335608b2b6611eaac1eba516219d457f549c6862 100644 (file)
@@ -15,6 +15,7 @@ class AppVersion
 
   def self.forget
     @hash = nil
+    @package_version = nil
   end
 
   # Return abbrev commit hash for current code version: "abc1234", or
@@ -53,4 +54,18 @@ class AppVersion
 
     @hash || "unknown"
   end
+
+  def self.package_version
+    if (cached = Rails.configuration.package_version || @package_version)
+      return cached
+    end
+
+    begin
+      @package_version = IO.read(Rails.root.join("package-build.version")).strip
+    rescue Errno::ENOENT
+      @package_version = "unknown"
+    end
+
+    @package_version
+  end
 end
index 3cabc1e3ce75842d6e187a7f99ab6a12dd510d84..73ad7606cc879ef58f7569c960196191c7fb7721 100644 (file)
@@ -297,7 +297,7 @@ class CrunchDispatch
     @fetched_commits[sha1] = ($? == 0)
   end
 
-  def tag_commit(commit_hash, tag_name)
+  def tag_commit(job, commit_hash, tag_name)
     # @git_tags[T]==V if we know commit V has been tagged T in the
     # arvados_internal repository.
     if not @git_tags[tag_name]
@@ -381,20 +381,20 @@ class CrunchDispatch
           next
         end
         ready &&= get_commit repo.server_path, job.script_version
-        ready &&= tag_commit job.script_version, job.uuid
+        ready &&= tag_commit job, job.script_version, job.uuid
       end
 
       # This should be unnecessary, because API server does it during
       # job create/update, but it's still not a bad idea to verify the
       # tag is correct before starting the job:
-      ready &&= tag_commit job.script_version, job.uuid
+      ready &&= tag_commit job, job.script_version, job.uuid
 
       # The arvados_sdk_version doesn't support use of arbitrary
       # remote URLs, so the requested version isn't necessarily copied
       # into the internal repository yet.
       if job.arvados_sdk_version
         ready &&= get_commit @arvados_repo_path, job.arvados_sdk_version
-        ready &&= tag_commit job.arvados_sdk_version, "#{job.uuid}-arvados-sdk"
+        ready &&= tag_commit job, job.arvados_sdk_version, "#{job.uuid}-arvados-sdk"
       end
 
       if not ready
index f4da283d746fcbecba6a03ce54fa041f0e3f58de..f78a3d34dc5d00b2d47ac8b4f9634d319c5462bc 100644 (file)
@@ -7,6 +7,12 @@ class SafeJSON
     return Oj.dump(o, mode: :compat)
   end
   def self.load(s)
+    if s.nil? or s == ''
+      # Oj 2.18.5 used to return nil. Not anymore on 3.6.4.
+      # Upgraded for performance issues (see #13803 and
+      # https://github.com/ohler55/oj/issues/441)
+      return nil
+    end
     Oj.strict_load(s, symbol_keys: false)
   end
 end
index f25d4238106697002a692c552e0b300b4d90067a..17aed4b48dba66b079431007408dae49ee6442cf 100644 (file)
@@ -6,7 +6,7 @@ module WhitelistUpdate
   def check_update_whitelist permitted_fields
     attribute_names.each do |field|
       if !permitted_fields.include?(field.to_sym) && really_changed(field)
-        errors.add field, "cannot be modified in this state (#{send(field+"_was").inspect}, #{send(field).inspect})"
+        errors.add field, "cannot be modified in state '#{self.state}' (#{send(field+"_was").inspect}, #{send(field).inspect})"
       end
     end
   end
index c15060d1a9847cf33f774399b6decf7ff8f96b45..40868c87b8857ba34cd5aff2bfbf065506d50b00 100644 (file)
@@ -31,17 +31,29 @@ class Arvados::V1::SchemaControllerTest < ActionController::TestCase
     assert_includes discovery_doc, 'defaultTrashLifetime'
     assert_equal discovery_doc['defaultTrashLifetime'], Rails.application.config.default_trash_lifetime
     assert_match(/^[0-9a-f]+(-modified)?$/, discovery_doc['source_version'])
+    assert_match(/^[0-9a-f]+(-modified)?$/, discovery_doc['sourceVersion'])
+    assert_match(/^unknown$/, discovery_doc['packageVersion'])
     assert_equal discovery_doc['websocketUrl'], Rails.application.config.websocket_address
     assert_equal discovery_doc['workbenchUrl'], Rails.application.config.workbench_address
     assert_equal('zzzzz', discovery_doc['uuidPrefix'])
   end
 
-  test "discovery document overrides source_version with config" do
+  test "discovery document overrides source_version & sourceVersion with config" do
     Rails.configuration.source_version = 'aaa888fff'
     get :index
     assert_response :success
     discovery_doc = JSON.parse(@response.body)
+    # Key source_version will be replaced with sourceVersion
     assert_equal 'aaa888fff', discovery_doc['source_version']
+    assert_equal 'aaa888fff', discovery_doc['sourceVersion']
+  end
+
+  test "discovery document overrides packageVersion with config" do
+    Rails.configuration.package_version = '1.0.0-stable'
+    get :index
+    assert_response :success
+    discovery_doc = JSON.parse(@response.body)
+    assert_equal '1.0.0-stable', discovery_doc['packageVersion']
   end
 
   test "empty disable_api_methods" do
index 6d7f4a0616e4068956c050b3db84f504b2e34ef3..c38c230b2276609c6ce21ccf581f4e710854167d 100644 (file)
@@ -85,6 +85,7 @@ class RemoteUsersTest < ActionDispatch::IntegrationTest
     assert_response :success
     assert_equal 'zbbbb-tpzed-000000000000000', json_response['uuid']
     assert_equal false, json_response['is_admin']
+    assert_equal false, json_response['is_active']
     assert_equal 'foo@example.com', json_response['email']
     assert_equal 'barney', json_response['username']
 
@@ -218,4 +219,36 @@ class RemoteUsersTest < ActionDispatch::IntegrationTest
     refute_includes(group_uuids, groups(:trashed_project).uuid)
     refute_includes(group_uuids, groups(:testusergroup_admins).uuid)
   end
+
+  test 'auto-activate user from trusted cluster' do
+    Rails.configuration.auto_activate_users_from = ['zbbbb']
+    get '/arvados/v1/users/current', {format: 'json'}, auth(remote: 'zbbbb')
+    assert_response :success
+    assert_equal 'zbbbb-tpzed-000000000000000', json_response['uuid']
+    assert_equal false, json_response['is_admin']
+    assert_equal true, json_response['is_active']
+    assert_equal 'foo@example.com', json_response['email']
+    assert_equal 'barney', json_response['username']
+  end
+
+  test 'pre-activate remote user' do
+    post '/arvados/v1/users', {
+           "user" => {
+             "uuid" => "zbbbb-tpzed-000000000000000",
+             "email" => 'foo@example.com',
+             "username" => 'barney',
+             "is_active" => true
+           }
+    }, {'HTTP_AUTHORIZATION' => "OAuth2 #{api_token(:admin)}"}
+    assert_response :success
+
+    get '/arvados/v1/users/current', {format: 'json'}, auth(remote: 'zbbbb')
+    assert_response :success
+    assert_equal 'zbbbb-tpzed-000000000000000', json_response['uuid']
+    assert_equal nil, json_response['is_admin']
+    assert_equal true, json_response['is_active']
+    assert_equal 'foo@example.com', json_response['email']
+    assert_equal 'barney', json_response['username']
+  end
+
 end
index c834250cb6caa89c28ff25ad942978dd14399949..6dbaa7550f55a8e49b035e6092c331304c6e4edb 100644 (file)
@@ -143,7 +143,7 @@ class ActiveSupport::TestCase
   end
 
   def self.slow_test(name, &block)
-    define_method(name, block) unless skip_slow_tests?
+    test(name, &block) unless skip_slow_tests?
   end
 end
 
index 8071e05cebe92669e3c240d5697f80ab55998633..f266c096b475ca6306c9086d22029bdc6e22cb3e 100644 (file)
@@ -855,6 +855,11 @@ class ContainerRequestTest < ActiveSupport::TestCase
     [{"partitions" => "fastcpu"}, ContainerRequest::Committed, ActiveRecord::RecordInvalid],
     [{"partitions" => "fastcpu"}, ContainerRequest::Uncommitted],
     [{"partitions" => ["fastcpu","vfastcpu"]}, ContainerRequest::Committed],
+    [{"max_run_time" => "one day"}, ContainerRequest::Committed, ActiveRecord::RecordInvalid],
+    [{"max_run_time" => "one day"}, ContainerRequest::Uncommitted],
+    [{"max_run_time" => -1}, ContainerRequest::Committed, ActiveRecord::RecordInvalid],
+    [{"max_run_time" => -1}, ContainerRequest::Uncommitted],
+    [{"max_run_time" => 86400}, ContainerRequest::Committed],
   ].each do |sp, state, expected|
     test "create container request with scheduling_parameters #{sp} in state #{state} and verify #{expected}" do
       common_attrs = {cwd: "test",
@@ -881,6 +886,26 @@ class ContainerRequestTest < ActiveSupport::TestCase
     end
   end
 
+  test "Having preemptible_instances=true create a committed child container request and verify the scheduling parameter of its container" do
+    common_attrs = {cwd: "test",
+                    priority: 1,
+                    command: ["echo", "hello"],
+                    output_path: "test",
+                    state: ContainerRequest::Committed,
+                    mounts: {"test" => {"kind" => "json"}}}
+    set_user_from_auth :active
+    Rails.configuration.preemptible_instances = true
+
+    cr = with_container_auth(Container.find_by_uuid 'zzzzz-dz642-runningcontainr') do
+      create_minimal_req!(common_attrs)
+    end
+    assert_equal 'zzzzz-dz642-runningcontainr', cr.requesting_container_uuid
+    assert_equal true, cr.scheduling_parameters["preemptible"]
+
+    c = Container.find_by_uuid(cr.container_uuid)
+    assert_equal true, c.scheduling_parameters["preemptible"]
+  end
+
   [['Committed', true, {name: "foobar", priority: 123}],
    ['Committed', false, {container_count: 2}],
    ['Committed', false, {container_count: 0}],
index 72beca6c78134dbe92bd9ce4b65d8b3e70c6d530..67c410047cfb5e62ba65be801a46bd20b721971d 100644 (file)
@@ -643,11 +643,11 @@ class UserTest < ActiveSupport::TestCase
     assert_equal(expect_username, user.username)
 
     # check user setup
-    verify_link_exists(Rails.configuration.auto_setup_new_users,
+    verify_link_exists(Rails.configuration.auto_setup_new_users || active,
                        groups(:all_users).uuid, user.uuid,
                        "permission", "can_read")
     # Check for OID login link.
-    verify_link_exists(Rails.configuration.auto_setup_new_users,
+    verify_link_exists(Rails.configuration.auto_setup_new_users || active,
                        user.uuid, user.email, "permission", "can_login")
     # Check for repository.
     if named_repo = (prior_repo or
index b4033e78b00abee87e2fb7423281021be5233577..719ec98d27aa19d65eceb7d3db3a46f506aed2f0 100644 (file)
@@ -55,11 +55,12 @@ func (s *IntegrationSuite) TearDownTest(c *C) {
 }
 
 type slurmFake struct {
-       didBatch   [][]string
-       didCancel  []string
-       didRelease []string
-       didRenice  [][]string
-       queue      string
+       didBatch      [][]string
+       didCancel     []string
+       didRelease    []string
+       didRenice     [][]string
+       queue         string
+       rejectNice10K bool
        // If non-nil, run this func during the 2nd+ call to Cancel()
        onCancel func()
        // Error returned by Batch()
@@ -82,6 +83,9 @@ func (sf *slurmFake) Release(name string) error {
 
 func (sf *slurmFake) Renice(name string, nice int64) error {
        sf.didRenice = append(sf.didRenice, []string{name, fmt.Sprintf("%d", nice)})
+       if sf.rejectNice10K && nice > 10000 {
+               return errors.New("scontrol: error: Invalid nice value, must be between -10000 and 10000")
+       }
        return nil
 }
 
@@ -367,17 +371,17 @@ func (s *StubbedSuite) TestSbatchInstanceTypeConstraint(c *C) {
        }
 
        for _, trial := range []struct {
-               types      []arvados.InstanceType
+               types      map[string]arvados.InstanceType
                sbatchArgs []string
                err        error
        }{
                // Choose node type => use --constraint arg
                {
-                       types: []arvados.InstanceType{
-                               {Name: "a1.tiny", Price: 0.02, RAM: 128000000, VCPUs: 1},
-                               {Name: "a1.small", Price: 0.04, RAM: 256000000, VCPUs: 2},
-                               {Name: "a1.medium", Price: 0.08, RAM: 512000000, VCPUs: 4},
-                               {Name: "a1.large", Price: 0.16, RAM: 1024000000, VCPUs: 8},
+                       types: map[string]arvados.InstanceType{
+                               "a1.tiny":   {Name: "a1.tiny", Price: 0.02, RAM: 128000000, VCPUs: 1},
+                               "a1.small":  {Name: "a1.small", Price: 0.04, RAM: 256000000, VCPUs: 2},
+                               "a1.medium": {Name: "a1.medium", Price: 0.08, RAM: 512000000, VCPUs: 4},
+                               "a1.large":  {Name: "a1.large", Price: 0.16, RAM: 1024000000, VCPUs: 8},
                        },
                        sbatchArgs: []string{"--constraint=instancetype=a1.medium"},
                },
@@ -388,8 +392,8 @@ func (s *StubbedSuite) TestSbatchInstanceTypeConstraint(c *C) {
                },
                // No node type is big enough => error
                {
-                       types: []arvados.InstanceType{
-                               {Name: "a1.tiny", Price: 0.02, RAM: 128000000, VCPUs: 1},
+                       types: map[string]arvados.InstanceType{
+                               "a1.tiny": {Name: "a1.tiny", Price: 0.02, RAM: 128000000, VCPUs: 1},
                        },
                        err: dispatchcloud.ConstraintsNotSatisfiableError{},
                },
index 742943f197580e186e7fd1f7b8084a1357f3661d..fd4851eb0a8a92b48fcacef0e4552ce99d0a7f48 100644 (file)
@@ -14,11 +14,14 @@ import (
        "time"
 )
 
+const slurm15NiceLimit int64 = 10000
+
 type slurmJob struct {
        uuid         string
        wantPriority int64
        priority     int64 // current slurm priority (incorporates nice value)
        nice         int64 // current slurm nice value
+       hitNiceLimit bool
 }
 
 // Squeue implements asynchronous polling monitor of the SLURM queue using the
@@ -103,10 +106,18 @@ func (sqc *SqueueChecker) reniceAll() {
        })
        renice := wantNice(jobs, sqc.PrioritySpread)
        for i, job := range jobs {
-               if renice[i] == job.nice {
+               niceNew := renice[i]
+               if job.hitNiceLimit && niceNew > slurm15NiceLimit {
+                       niceNew = slurm15NiceLimit
+               }
+               if niceNew == job.nice {
                        continue
                }
-               sqc.Slurm.Renice(job.uuid, renice[i])
+               err := sqc.Slurm.Renice(job.uuid, niceNew)
+               if err != nil && niceNew > slurm15NiceLimit && strings.Contains(err.Error(), "Invalid nice value") {
+                       log.Printf("container %q clamping nice values at %d, priority order will not be correct -- see https://dev.arvados.org/projects/arvados/wiki/SLURM_integration#Limited-nice-values-SLURM-15", job.uuid, slurm15NiceLimit)
+                       job.hitNiceLimit = true
+               }
        }
 }
 
@@ -157,14 +168,17 @@ func (sqc *SqueueChecker) check() {
                replacing.nice = n
                newq[uuid] = replacing
 
-               if state == "PENDING" && ((reason == "BadConstraints" && p == 0) || reason == "launch failed requeued held") && replacing.wantPriority > 0 {
+               if state == "PENDING" && ((reason == "BadConstraints" && p <= 2*slurm15NiceLimit) || reason == "launch failed requeued held") && replacing.wantPriority > 0 {
                        // When using SLURM 14.x or 15.x, our queued
                        // jobs land in this state when "scontrol
                        // reconfigure" invalidates their feature
                        // constraints by clearing all node features.
                        // They stay in this state even after the
                        // features reappear, until we run "scontrol
-                       // release {jobid}".
+                       // release {jobid}". Priority is usually 0 in
+                       // this state, but sometimes (due to a race
+                       // with nice adjustments?) it's a small
+                       // positive value.
                        //
                        // "scontrol release" is silent and successful
                        // regardless of whether the features have
@@ -175,7 +189,7 @@ func (sqc *SqueueChecker) check() {
                        // "launch failed requeued held" seems to be
                        // another manifestation of this problem,
                        // resolved the same way.
-                       log.Printf("releasing held job %q", uuid)
+                       log.Printf("releasing held job %q (priority=%d, state=%q, reason=%q)", uuid, p, state, reason)
                        sqc.Slurm.Release(uuid)
                } else if p < 1<<20 && replacing.wantPriority > 0 {
                        log.Printf("warning: job %q has low priority %d, nice %d, state %q, reason %q", uuid, p, n, state, reason)
index c9329fdf95bf87028346fb727b8521dc8edfa1cd..ef036dabd781edd425b29fc28f847ae18370d700 100644 (file)
@@ -103,6 +103,50 @@ func (s *SqueueSuite) TestReniceAll(c *C) {
        }
 }
 
+// If a limited nice range prevents desired priority adjustments, give
+// up and clamp nice to 10K.
+func (s *SqueueSuite) TestReniceInvalidNiceValue(c *C) {
+       uuids := []string{"zzzzz-dz642-fake0fake0fake0", "zzzzz-dz642-fake1fake1fake1", "zzzzz-dz642-fake2fake2fake2"}
+       slurm := &slurmFake{
+               queue:         uuids[0] + " 0 4294000222 PENDING Resources\n" + uuids[1] + " 0 4294555222 PENDING Resources\n",
+               rejectNice10K: true,
+       }
+       sqc := &SqueueChecker{
+               Slurm:          slurm,
+               PrioritySpread: 1,
+               Period:         time.Hour,
+       }
+       sqc.startOnce.Do(sqc.start)
+       sqc.check()
+       sqc.SetPriority(uuids[0], 2)
+       sqc.SetPriority(uuids[1], 1)
+
+       // First attempt should renice to 555001, which will fail
+       sqc.reniceAll()
+       c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}})
+
+       // Next attempt should renice to 10K, which will succeed
+       sqc.reniceAll()
+       c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}, {uuids[1], "10000"}})
+       // ...so we'll change the squeue response to reflect the
+       // updated priority+nice, and make sure sqc sees that...
+       slurm.queue = uuids[0] + " 0 4294000222 PENDING Resources\n" + uuids[1] + " 10000 4294545222 PENDING Resources\n"
+       sqc.check()
+
+       // Next attempt should leave nice alone because it's already
+       // at the 10K limit
+       sqc.reniceAll()
+       c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}, {uuids[1], "10000"}})
+
+       // Back to normal if desired nice value falls below 10K
+       slurm.queue = uuids[0] + " 0 4294000222 PENDING Resources\n" + uuids[1] + " 10000 4294000111 PENDING Resources\n"
+       sqc.check()
+       sqc.reniceAll()
+       c.Check(slurm.didRenice, DeepEquals, [][]string{{uuids[1], "555001"}, {uuids[1], "10000"}, {uuids[1], "9890"}})
+
+       sqc.Stop()
+}
+
 // If the given UUID isn't in the slurm queue yet, SetPriority()
 // should wait for it to appear on the very next poll, then give up.
 func (s *SqueueSuite) TestSetPriorityBeforeQueued(c *C) {
index 2f9ccf52460a667215cdfb9156b7df56605712a5..098c53f8a6a587816703ad6997ceb51eec7f0232 100644 (file)
@@ -32,6 +32,7 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
        "git.curoverse.com/arvados.git/sdk/go/manifest"
+       "github.com/shirou/gopsutil/process"
        "golang.org/x/net/context"
 
        dockertypes "github.com/docker/docker/api/types"
@@ -83,6 +84,10 @@ type ThinDockerClient interface {
        ImageRemove(ctx context.Context, image string, options dockertypes.ImageRemoveOptions) ([]dockertypes.ImageDeleteResponseItem, error)
 }
 
+type PsProcess interface {
+       CmdlineSlice() ([]string, error)
+}
+
 // ContainerRunner is the main stateful struct used for a single execution of a
 // container.
 type ContainerRunner struct {
@@ -118,6 +123,8 @@ type ContainerRunner struct {
        finalState    string
        parentTemp    string
 
+       ListProcesses func() ([]PsProcess, error)
+
        statLogger       io.WriteCloser
        statReporter     *crunchstat.Reporter
        hoststatLogger   io.WriteCloser
@@ -141,9 +148,10 @@ type ContainerRunner struct {
        cStateLock sync.Mutex
        cCancelled bool // StopContainer() invoked
 
-       enableNetwork string // one of "default" or "always"
-       networkMode   string // passed through to HostConfig.NetworkMode
-       arvMountLog   *ThrottledLogger
+       enableNetwork   string // one of "default" or "always"
+       networkMode     string // passed through to HostConfig.NetworkMode
+       arvMountLog     *ThrottledLogger
+       checkContainerd time.Duration
 }
 
 // setupSignals sets up signal handling to gracefully terminate the underlying
@@ -182,26 +190,31 @@ func (runner *ContainerRunner) stop(sig os.Signal) {
 var errorBlacklist = []string{
        "(?ms).*[Cc]annot connect to the Docker daemon.*",
        "(?ms).*oci runtime error.*starting container process.*container init.*mounting.*to rootfs.*no such file or directory.*",
+       "(?ms).*grpc: the connection is unavailable.*",
 }
 var brokenNodeHook *string = flag.String("broken-node-hook", "", "Script to run if node is detected to be broken (for example, Docker daemon is not running)")
 
+func (runner *ContainerRunner) runBrokenNodeHook() {
+       if *brokenNodeHook == "" {
+               runner.CrunchLog.Printf("No broken node hook provided, cannot mark node as broken.")
+       } else {
+               runner.CrunchLog.Printf("Running broken node hook %q", *brokenNodeHook)
+               // run killme script
+               c := exec.Command(*brokenNodeHook)
+               c.Stdout = runner.CrunchLog
+               c.Stderr = runner.CrunchLog
+               err := c.Run()
+               if err != nil {
+                       runner.CrunchLog.Printf("Error running broken node hook: %v", err)
+               }
+       }
+}
+
 func (runner *ContainerRunner) checkBrokenNode(goterr error) bool {
        for _, d := range errorBlacklist {
                if m, e := regexp.MatchString(d, goterr.Error()); m && e == nil {
                        runner.CrunchLog.Printf("Error suggests node is unable to run containers: %v", goterr)
-                       if *brokenNodeHook == "" {
-                               runner.CrunchLog.Printf("No broken node hook provided, cannot mark node as broken.")
-                       } else {
-                               runner.CrunchLog.Printf("Running broken node hook %q", *brokenNodeHook)
-                               // run killme script
-                               c := exec.Command(*brokenNodeHook)
-                               c.Stdout = runner.CrunchLog
-                               c.Stderr = runner.CrunchLog
-                               err := c.Run()
-                               if err != nil {
-                                       runner.CrunchLog.Printf("Error running broken node hook: %v", err)
-                               }
-                       }
+                       runner.runBrokenNodeHook()
                        return true
                }
        }
@@ -1001,6 +1014,10 @@ func (runner *ContainerRunner) CreateContainer() error {
        runner.ContainerConfig.Volumes = runner.Volumes
 
        maxRAM := int64(runner.Container.RuntimeConstraints.RAM)
+       if maxRAM < 4*1024*1024 {
+               // Docker daemon won't let you set a limit less than 4 MiB
+               maxRAM = 4 * 1024 * 1024
+       }
        runner.HostConfig = dockercontainer.HostConfig{
                Binds: runner.Binds,
                LogConfig: dockercontainer.LogConfig{
@@ -1071,13 +1088,60 @@ func (runner *ContainerRunner) StartContainer() error {
        return nil
 }
 
+// checkContainerd checks if "containerd" is present in the process list.
+func (runner *ContainerRunner) CheckContainerd() error {
+       if runner.checkContainerd == 0 {
+               return nil
+       }
+       p, _ := runner.ListProcesses()
+       for _, i := range p {
+               e, _ := i.CmdlineSlice()
+               if len(e) > 0 {
+                       if strings.Index(e[0], "containerd") > -1 {
+                               return nil
+                       }
+               }
+       }
+
+       // Not found
+       runner.runBrokenNodeHook()
+       runner.stop(nil)
+       return fmt.Errorf("'containerd' not found in process list.")
+}
+
 // WaitFinish waits for the container to terminate, capture the exit code, and
 // close the stdout/stderr logging.
 func (runner *ContainerRunner) WaitFinish() error {
+       var runTimeExceeded <-chan time.Time
        runner.CrunchLog.Print("Waiting for container to finish")
 
        waitOk, waitErr := runner.Docker.ContainerWait(context.TODO(), runner.ContainerID, dockercontainer.WaitConditionNotRunning)
        arvMountExit := runner.ArvMountExit
+       if timeout := runner.Container.SchedulingParameters.MaxRunTime; timeout > 0 {
+               runTimeExceeded = time.After(time.Duration(timeout) * time.Second)
+       }
+
+       containerdGone := make(chan error)
+       defer close(containerdGone)
+       if runner.checkContainerd > 0 {
+               go func() {
+                       ticker := time.NewTicker(time.Duration(runner.checkContainerd))
+                       defer ticker.Stop()
+                       for {
+                               select {
+                               case <-ticker.C:
+                                       if ck := runner.CheckContainerd(); ck != nil {
+                                               containerdGone <- ck
+                                               return
+                                       }
+                               case <-containerdGone:
+                                       // Channel closed, quit goroutine
+                                       return
+                               }
+                       }
+               }()
+       }
+
        for {
                select {
                case waitBody := <-waitOk:
@@ -1098,6 +1162,14 @@ func (runner *ContainerRunner) WaitFinish() error {
                        // arvMountExit will always be ready now that
                        // it's closed, but that doesn't interest us.
                        arvMountExit = nil
+
+               case <-runTimeExceeded:
+                       runner.CrunchLog.Printf("maximum run time exceeded. Stopping container.")
+                       runner.stop(nil)
+                       runTimeExceeded = nil
+
+               case err := <-containerdGone:
+                       return err
                }
        }
 }
@@ -1399,6 +1471,12 @@ func (runner *ContainerRunner) Run() (err error) {
                return
        }
 
+       // Sanity check that containerd is running.
+       err = runner.CheckContainerd()
+       if err != nil {
+               return
+       }
+
        // check for and/or load image
        err = runner.LoadImage()
        if err != nil {
@@ -1518,6 +1596,17 @@ func NewContainerRunner(client *arvados.Client, api IArvadosClient, kc IKeepClie
        cr.NewLogWriter = cr.NewArvLogWriter
        cr.RunArvMount = cr.ArvMountCmd
        cr.MkTempDir = ioutil.TempDir
+       cr.ListProcesses = func() ([]PsProcess, error) {
+               pr, err := process.Processes()
+               if err != nil {
+                       return nil, err
+               }
+               ps := make([]PsProcess, len(pr))
+               for i, j := range pr {
+                       ps[i] = j
+               }
+               return ps, nil
+       }
        cr.MkArvClient = func(token string) (IArvadosClient, error) {
                cl, err := arvadosclient.MakeArvadosClient()
                if err != nil {
@@ -1560,6 +1649,7 @@ func main() {
        `)
        memprofile := flag.String("memprofile", "", "write memory profile to `file` after running container")
        getVersion := flag.Bool("version", false, "Print version information and exit.")
+       checkContainerd := flag.Duration("check-containerd", 60*time.Second, "Periodic check if (docker-)containerd is running (use 0s to disable).")
        flag.Parse()
 
        // Print version information if requested
@@ -1615,6 +1705,7 @@ func main() {
        cr.expectCgroupParent = *cgroupParent
        cr.enableNetwork = *enableNetwork
        cr.networkMode = *networkMode
+       cr.checkContainerd = *checkContainerd
        if *cgroupParentSubsystem != "" {
                p := findCgroup(*cgroupParentSubsystem)
                cr.setCgroupParent = p
index c76682f1c69be0297606f88ceaaa8b8aa260d71a..8d8e0400003a94dae160ee65a69ccd92f723c823 100644 (file)
@@ -793,7 +793,7 @@ func (s *TestSuite) TestFullRunHello(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {}
+       "runtime_constraints": {}
 }`, nil, 0, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, "hello world\n"))
                t.logWriter.Close()
@@ -805,6 +805,26 @@ func (s *TestSuite) TestFullRunHello(c *C) {
 
 }
 
+func (s *TestSuite) TestRunTimeExceeded(c *C) {
+       api, _, _ := s.fullRunHelper(c, `{
+    "command": ["sleep", "3"],
+    "container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
+    "cwd": ".",
+    "environment": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
+    "output_path": "/tmp",
+    "priority": 1,
+       "runtime_constraints": {},
+       "scheduling_parameters":{"max_run_time": 1}
+}`, nil, 0, func(t *TestDockerClient) {
+               time.Sleep(3 * time.Second)
+               t.logWriter.Close()
+       })
+
+       c.Check(api.CalledWith("container.state", "Cancelled"), NotNil)
+       c.Check(api.Logs["crunch-run"].String(), Matches, "(?ms).*maximum run time exceeded.*")
+}
+
 func (s *TestSuite) TestCrunchstat(c *C) {
        api, _, _ := s.fullRunHelper(c, `{
                "command": ["sleep", "1"],
@@ -2047,3 +2067,49 @@ func (s *TestSuite) TestSecretTextMountPoint(c *C) {
        c.Check(api.CalledWith("collection.manifest_text", ". 34819d7beeabb9260a5c854bc85b3e44+10 0:10:secret.conf\n"), IsNil)
        c.Check(api.CalledWith("collection.manifest_text", ""), NotNil)
 }
+
+type FakeProcess struct {
+       cmdLine []string
+}
+
+func (fp FakeProcess) CmdlineSlice() ([]string, error) {
+       return fp.cmdLine, nil
+}
+
+func (s *TestSuite) helpCheckContainerd(c *C, lp func() ([]PsProcess, error)) error {
+       kc := &KeepTestClient{}
+       defer kc.Close()
+       cr, err := NewContainerRunner(s.client, &ArvTestClient{callraw: true}, kc, s.docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+       cr.checkContainerd = time.Duration(100 * time.Millisecond)
+       c.Assert(err, IsNil)
+       cr.ListProcesses = lp
+
+       s.docker.fn = func(t *TestDockerClient) {
+               time.Sleep(1 * time.Second)
+               t.logWriter.Close()
+       }
+
+       err = cr.CreateContainer()
+       c.Check(err, IsNil)
+
+       err = cr.StartContainer()
+       c.Check(err, IsNil)
+
+       err = cr.WaitFinish()
+       return err
+
+}
+
+func (s *TestSuite) TestCheckContainerdPresent(c *C) {
+       err := s.helpCheckContainerd(c, func() ([]PsProcess, error) {
+               return []PsProcess{FakeProcess{[]string{"docker-containerd"}}}, nil
+       })
+       c.Check(err, IsNil)
+}
+
+func (s *TestSuite) TestCheckContainerdMissing(c *C) {
+       err := s.helpCheckContainerd(c, func() ([]PsProcess, error) {
+               return []PsProcess{FakeProcess{[]string{"abc"}}}, nil
+       })
+       c.Check(err, ErrorMatches, `'containerd' not found in process list.`)
+}
index 86f8cec04ae8037e37a1d4c9250216416f2f9bd6..13a171ae8416729cf67fd940a2170d871abc5bd1 100644 (file)
@@ -83,7 +83,7 @@ func (s *LoggingTestSuite) TestWriteLogsLarge(c *C) {
        cr.CrunchLog.Print("Goodbye")
        cr.CrunchLog.Close()
 
-       c.Check(api.Calls > 1, Equals, true)
+       c.Check(api.Calls > 0, Equals, true)
        c.Check(api.Calls < 2000000, Equals, true)
 
        mt, err := cr.LogCollection.MarshalManifest(".")
index 9ee99903c8d1e537d487a67d1c77d848fc93c807..59e8de3bc9f884dec899e22072c3afe684aceb1a 100644 (file)
@@ -99,7 +99,7 @@ func (c *cache) Update(client *arvados.Client, coll arvados.Collection, fs arvad
        }
        var updated arvados.Collection
        defer c.pdhs.Remove(coll.UUID)
-       err := client.RequestAndDecode(&updated, "PATCH", "/arvados/v1/collections/"+coll.UUID, client.UpdateBody(coll), nil)
+       err := client.RequestAndDecode(&updated, "PATCH", "arvados/v1/collections/"+coll.UUID, client.UpdateBody(coll), nil)
        if err == nil {
                c.collections.Add(client.AuthToken+"\000"+coll.PortableDataHash, &cachedCollection{
                        expire:     time.Now().Add(time.Duration(c.TTL)),
index 3814a459d53c46c8b92d7dc40d8fd8cd13ee6ae4..0e2f17c35b85df02b98df4d3e29a974d18deb17d 100644 (file)
@@ -74,7 +74,7 @@ func (s *IntegrationSuite) testCadaver(c *check.C, password string, pathFunc fun
        var newCollection arvados.Collection
        arv := arvados.NewClientFromEnv()
        arv.AuthToken = arvadostest.ActiveToken
-       err = arv.RequestAndDecode(&newCollection, "POST", "/arvados/v1/collections", bytes.NewBufferString(url.Values{"collection": {"{}"}}.Encode()), nil)
+       err = arv.RequestAndDecode(&newCollection, "POST", "arvados/v1/collections", bytes.NewBufferString(url.Values{"collection": {"{}"}}.Encode()), nil)
        c.Assert(err, check.IsNil)
 
        readPath, writePath, pdhPath := pathFunc(newCollection)
index 517ec1a2a26e96967ad50bec925a65b1f6149f6a..7d17be6e7cfe8c59305b452c8d788bca5748acdc 100644 (file)
@@ -183,6 +183,9 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
                remoteAddr = xff + "," + remoteAddr
        }
+       if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
+               r.URL.Scheme = xfp
+       }
 
        w := httpserver.WrapResponseWriter(wOrig)
        defer func() {
@@ -773,6 +776,7 @@ func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, loc
                u = newu
        }
        redir := (&url.URL{
+               Scheme:   r.URL.Scheme,
                Host:     r.Host,
                Path:     u.Path,
                RawQuery: redirQuery.Encode(),
index f86f81bfa15e5a1c20fed2f68a796f029ae3a966..206bf6f4381fd98d4e7c4244e787c040de558aad 100644 (file)
@@ -513,7 +513,7 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
        if resp.Code != http.StatusSeeOther {
                return resp
        }
-       c.Check(resp.Body.String(), check.Matches, `.*href="//`+regexp.QuoteMeta(html.EscapeString(hostPath))+`(\?[^"]*)?".*`)
+       c.Check(resp.Body.String(), check.Matches, `.*href="http://`+regexp.QuoteMeta(html.EscapeString(hostPath))+`(\?[^"]*)?".*`)
        cookies := (&http.Response{Header: resp.Header()}).Cookies()
 
        u, _ = u.Parse(resp.Header().Get("Location"))
index e87fa4afd0db660c16af8a7ec78e68027620c531..dc70d968e2992a16581694ac70bbf42ba92f93ba 100644 (file)
@@ -617,7 +617,7 @@ func (s *ServerRequiredSuite) TestAskGetKeepProxyConnectionError(c *C) {
        kc := runProxy(c, nil, false)
        defer closeListener()
 
-       // Point keepproxy to a non-existant keepstore
+       // Point keepproxy at a non-existent keepstore
        locals := map[string]string{
                TestProxyUUID: "http://localhost:12345",
        }
index b4fec5096d5a8e2767169fce3910f45136ddaee3..77c515d565e8113681c2dad610d103fae4156a15 100644 (file)
@@ -243,12 +243,15 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
         return super(ComputeNodeShutdownActor, self)._finished()
 
     def cancel_shutdown(self, reason, **kwargs):
+        if not self.cancellable:
+            return False
         if self.cancel_reason is not None:
             # already cancelled
-            return
+            return False
         self.cancel_reason = reason
         self._logger.info("Shutdown cancelled: %s.", reason)
         self._finished(success_flag=False)
+        return True
 
     def _cancel_on_exception(orig_func):
         @functools.wraps(orig_func)
@@ -282,6 +285,7 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
         self._logger.info("Starting shutdown")
         arv_node = self._arvados_node()
         if self._cloud.destroy_node(self.cloud_node):
+            self.cancellable = False
             self._logger.info("Shutdown success")
             if arv_node:
                 self._later.clean_arvados_node(arv_node)
@@ -335,7 +339,7 @@ class ComputeNodeMonitorActor(config.actor_class):
     def __init__(self, cloud_node, cloud_node_start_time, shutdown_timer,
                  timer_actor, update_actor, cloud_client,
                  arvados_node=None, poll_stale_after=600, node_stale_after=3600,
-                 boot_fail_after=1800
+                 boot_fail_after=1800, consecutive_idle_count=0
     ):
         super(ComputeNodeMonitorActor, self).__init__()
         self._later = self.actor_ref.tell_proxy()
@@ -350,6 +354,8 @@ class ComputeNodeMonitorActor(config.actor_class):
         self.boot_fail_after = boot_fail_after
         self.subscribers = set()
         self.arvados_node = None
+        self.consecutive_idle_count = consecutive_idle_count
+        self.consecutive_idle = 0
         self._later.update_arvados_node(arvados_node)
         self.last_shutdown_opening = None
         self._later.consider_shutdown()
@@ -456,8 +462,14 @@ class ComputeNodeMonitorActor(config.actor_class):
         else:
             boot_grace = "boot exceeded"
 
-        # API server side not implemented yet.
-        idle_grace = 'idle exceeded'
+        if crunch_worker_state == "idle":
+            # Must report as "idle" at least "consecutive_idle_count" times
+            if self.consecutive_idle < self.consecutive_idle_count:
+                idle_grace = 'idle wait'
+            else:
+                idle_grace = 'idle exceeded'
+        else:
+            idle_grace = 'not idle'
 
         node_state = (crunch_worker_state, window, boot_grace, idle_grace)
         t = transitions[node_state]
@@ -517,4 +529,8 @@ class ComputeNodeMonitorActor(config.actor_class):
         if arvados_node is not None:
             self.arvados_node = arvados_node
             self._update.sync_node(self.cloud_node, self.arvados_node)
+            if self.arvados_node['crunch_worker_state'] == "idle":
+                self.consecutive_idle += 1
+            else:
+                self.consecutive_idle = 0
             self._later.consider_shutdown()
index 1cf8f4e41d776e5861c41816aff34cf2d98604db..5b7785afd93744b2565a6b467f56e9e2617bb25a 100644 (file)
@@ -5,7 +5,7 @@
 
 from __future__ import absolute_import, print_function
 
-import subprocess
+import subprocess32 as subprocess
 import time
 
 from . import ComputeNodeMonitorActor
index 9e38d13eb7f4788d8af485a7e5b4b6589c9f324c..48d19f592bbdb0b87d905bac377c849000b59ef1 100644 (file)
@@ -35,8 +35,10 @@ class BaseComputeNodeDriver(RetryMixin):
         return driver_class(**auth_kwargs)
 
     @RetryMixin._retry()
-    def _set_sizes(self):
-        self.sizes = {sz.id: sz for sz in self.real.list_sizes()}
+    def sizes(self):
+        if self._sizes is None:
+            self._sizes = {sz.id: sz for sz in self.real.list_sizes()}
+        return self._sizes
 
     def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
                  driver_class, retry_wait=1, max_retry_wait=180):
@@ -73,7 +75,7 @@ class BaseComputeNodeDriver(RetryMixin):
                 if new_pair is not None:
                     self.create_kwargs[new_pair[0]] = new_pair[1]
 
-        self._set_sizes()
+        self._sizes = None
 
     def _init_ping_host(self, ping_host):
         self.ping_host = ping_host
index ae554327ca20d929a92b595da54e32ba05e6485f..719124d4000f724a271077d9f1614c50c6788f8d 100644 (file)
@@ -89,7 +89,7 @@ echo %s > /var/tmp/arv-node-data/meta-data/instance-type
         for n in nodes:
             # Need to populate Node.size
             if not n.size:
-                n.size = self.sizes[n.extra["properties"]["hardwareProfile"]["vmSize"]]
+                n.size = self.sizes()[n.extra["properties"]["hardwareProfile"]["vmSize"]]
             n.extra['arvados_node_size'] = n.extra.get('tags', {}).get('arvados_node_size')
         return nodes
 
index 2829b9c0b1bead892aa83d10cf01f6aaa4f7e9a3..14845ac12fe31414e84749190593556516b6b224 100644 (file)
@@ -41,7 +41,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         nodelist = super(ComputeNodeDriver, self).list_nodes()
         for node in nodelist:
             self._ensure_private_ip(node)
-            node.size = self.sizes["1"]
+            node.size = self.sizes()["1"]
         return nodelist
 
     def create_node(self, size, arvados_node):
index 2b1564279717d0d0159bd3c07b307b4b5675c98f..56812d258a92212b02a53d9775534d8b23b50b69 100644 (file)
@@ -110,7 +110,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         nodes = super(ComputeNodeDriver, self).list_nodes()
         for n in nodes:
             if not n.size:
-                n.size = self.sizes[n.extra["instance_type"]]
+                n.size = self.sizes()[n.extra["instance_type"]]
             n.extra['arvados_node_size'] = n.extra.get('tags', {}).get('arvados_node_size')
         return nodes
 
index be39ecba6bf4b3cfb4ef6e0e5dd7c1168dc86ddd..11025f7840bc00fe6c188ad6b0f9e9bea1795cba 100644 (file)
@@ -38,7 +38,6 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         super(ComputeNodeDriver, self).__init__(
             auth_kwargs, list_kwargs, create_kwargs,
             driver_class)
-        self._sizes_by_id = {sz.id: sz for sz in self.sizes.itervalues()}
         self._disktype_links = {dt.name: self._object_link(dt)
                                 for dt in self.real.ex_list_disktypes()}
 
@@ -120,7 +119,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
             # It's supposed to be the actual size object.  Check that it's not,
             # and monkeypatch the results when that's the case.
             if not hasattr(node.size, 'id'):
-                node.size = self._sizes_by_id[node.size]
+                node.size = self.sizes()[node.size]
             # Get arvados-assigned cloud size id
             node.extra['arvados_node_size'] = node.extra.get('metadata', {}).get('arvados_node_size')
         return nodelist
index 8c6757e51c451c253e5f16f57570d85bb52f3d7a..4fda7e76d69ed87aa1b5b032f1e3b1b8e608f5b5 100644 (file)
@@ -57,7 +57,8 @@ class NodeManagerConfig(ConfigParser.SafeConfigParser):
                        'boot_fail_after': str(sys.maxint),
                        'node_stale_after': str(60 * 60 * 2),
                        'watchdog': '600',
-                       'node_mem_scaling': '0.95'},
+                       'node_mem_scaling': '0.95',
+                       'consecutive_idle_count': '2'},
             'Manage': {'address': '127.0.0.1',
                        'port': '-1',
                        'ManagementToken': ''},
index 911798e08f937ded2d10e30b8b8fe7d64edd8f6b..1edf4dc4792e5b7a9f638a17599c66c76410ab84 100644 (file)
@@ -112,7 +112,8 @@ class NodeManagerDaemonActor(actor_class):
                  node_setup_class=dispatch.ComputeNodeSetupActor,
                  node_shutdown_class=dispatch.ComputeNodeShutdownActor,
                  node_actor_class=dispatch.ComputeNodeMonitorActor,
-                 max_total_price=0):
+                 max_total_price=0,
+                 consecutive_idle_count=1):
         super(NodeManagerDaemonActor, self).__init__()
         self._node_setup = node_setup_class
         self._node_shutdown = node_shutdown_class
@@ -133,6 +134,7 @@ class NodeManagerDaemonActor(actor_class):
         self.poll_stale_after = poll_stale_after
         self.boot_fail_after = boot_fail_after
         self.node_stale_after = node_stale_after
+        self.consecutive_idle_count = consecutive_idle_count
         self.last_polls = {}
         for poll_name in ['server_wishlist', 'arvados_nodes', 'cloud_nodes']:
             poll_actor = locals()[poll_name + '_actor']
@@ -173,7 +175,8 @@ class NodeManagerDaemonActor(actor_class):
             poll_stale_after=self.poll_stale_after,
             node_stale_after=self.node_stale_after,
             cloud_client=self._cloud_driver,
-            boot_fail_after=self.boot_fail_after)
+            boot_fail_after=self.boot_fail_after,
+            consecutive_idle_count=self.consecutive_idle_count)
         actorTell = actor.tell_proxy()
         actorTell.subscribe(self._later.node_can_shutdown)
         self._cloud_nodes_actor.subscribe_to(cloud_node.id,
@@ -390,22 +393,25 @@ class NodeManagerDaemonActor(actor_class):
         nodes_wanted = self._nodes_wanted(cloud_size)
         if nodes_wanted < 1:
             return None
-        arvados_node = self.arvados_nodes.find_stale_node(self.node_stale_after)
-        self._logger.info("Want %i more %s nodes.  Booting a node.",
-                          nodes_wanted, cloud_size.id)
-        new_setup = self._node_setup.start(
-            timer_actor=self._timer,
-            arvados_client=self._new_arvados(),
-            arvados_node=arvados_node,
-            cloud_client=self._new_cloud(),
-            cloud_size=self.server_calculator.find_size(cloud_size.id)).proxy()
-        self.booting[new_setup.actor_ref.actor_urn] = new_setup
-        self.sizes_booting[new_setup.actor_ref.actor_urn] = cloud_size
-
-        if arvados_node is not None:
-            self.arvados_nodes[arvados_node['uuid']].assignment_time = (
-                time.time())
-        new_setup.subscribe(self._later.node_setup_finished)
+
+        if not self.cancel_node_shutdown(cloud_size):
+            arvados_node = self.arvados_nodes.find_stale_node(self.node_stale_after)
+            self._logger.info("Want %i more %s nodes.  Booting a node.",
+                              nodes_wanted, cloud_size.id)
+            new_setup = self._node_setup.start(
+                timer_actor=self._timer,
+                arvados_client=self._new_arvados(),
+                arvados_node=arvados_node,
+                cloud_client=self._new_cloud(),
+                cloud_size=self.server_calculator.find_size(cloud_size.id))
+            self.booting[new_setup.actor_urn] = new_setup.proxy()
+            self.sizes_booting[new_setup.actor_urn] = cloud_size
+
+            if arvados_node is not None:
+                self.arvados_nodes[arvados_node['uuid']].assignment_time = (
+                    time.time())
+            new_setup.tell_proxy().subscribe(self._later.node_setup_finished)
+
         if nodes_wanted > 1:
             self._later.start_node(cloud_size)
 
@@ -456,13 +462,28 @@ class NodeManagerDaemonActor(actor_class):
         if (nodes_excess < 1) or not self.booting:
             return None
         for key, node in self.booting.iteritems():
-            if node and node.cloud_size.get().id == size.id and node.stop_if_no_cloud_node().get():
-                del self.booting[key]
-                del self.sizes_booting[key]
+            try:
+                if node and node.cloud_size.get().id == size.id and node.stop_if_no_cloud_node().get(2):
+                    del self.booting[key]
+                    del self.sizes_booting[key]
+                    if nodes_excess > 1:
+                        self._later.stop_booting_node(size)
+                    return
+            except pykka.Timeout:
+                pass
 
-                if nodes_excess > 1:
-                    self._later.stop_booting_node(size)
-                break
+    @_check_poll_freshness
+    def cancel_node_shutdown(self, size):
+        # Go through shutdown actors and see if there are any of the appropriate size that can be cancelled
+        for record in self.cloud_nodes.nodes.itervalues():
+            try:
+                if (record.shutdown_actor is not None and
+                    record.cloud_node.size.id == size.id and
+                    record.shutdown_actor.cancel_shutdown("Node size is in wishlist").get(2)):
+                        return True
+            except (pykka.ActorDeadError, pykka.Timeout) as e:
+                pass
+        return False
 
     def _begin_node_shutdown(self, node_actor, cancellable):
         cloud_node_obj = node_actor.cloud_node.get()
index e91764474fbd10edb28463368588329125e0a3db..1020b4a80ced597911b886c40789dea39f1d5598 100644 (file)
@@ -7,7 +7,7 @@ from __future__ import absolute_import, print_function
 
 import logging
 import re
-import subprocess
+import subprocess32 as subprocess
 
 import arvados.util
 
index f65e0806ec56df96f81c5bed87f657b15355fdec..1439c94118a913179f09a928df38caaab4c24e04 100644 (file)
@@ -144,7 +144,8 @@ def main(args=None):
             config.getint('Daemon', 'boot_fail_after'),
             config.getint('Daemon', 'node_stale_after'),
             node_setup, node_shutdown, node_monitor,
-            max_total_price=config.getfloat('Daemon', 'max_total_price')).tell_proxy()
+            max_total_price=config.getfloat('Daemon', 'max_total_price'),
+            consecutive_idle_count=config.getint('Daemon', 'consecutive_idle_count'),).tell_proxy()
 
         watchdog = WatchdogActor.start(config.getint('Daemon', 'watchdog'),
                             cloud_node_poller.actor_ref,
index 66af7c32d128ab3a51815a74443b885779052f6b..0abb3b3a379cbbbec7e619fdcca081ec98a340ea 100644 (file)
@@ -5,7 +5,7 @@
 
 from __future__ import absolute_import, print_function
 
-import subprocess
+import subprocess32 as subprocess
 
 from . import clientactor
 from . import config
index efd2445175589f761165aa7ff5746be7ab4b6f44..8ba68018d5840466698cf8a0cf19546887bf143b 100644 (file)
@@ -65,6 +65,15 @@ boot_fail_after = 1800
 # an Arvados node that hasn't been updated for this long.
 node_stale_after = 14400
 
+# Number of consecutive times a node must report as "idle" before it
+# will be considered eligible for shutdown.  Node status is checked
+# each poll period, and node can go idle at any point during a poll
+# period (meaning a node could be reported as idle that has only been
+# idle for 1 second).  With a 60 second poll period, three consecutive
+# status updates of "idle" suggests the node has been idle at least
+# 121 seconds.
+consecutive_idle_count = 3
+
 # Scaling factor to be applied to nodes' available RAM size. Usually there's a
 # variable discrepancy between the advertised RAM value on cloud nodes and the
 # actual amount available.
@@ -74,6 +83,7 @@ node_mem_scaling = 0.95
 # File path for Certificate Authorities
 certs_file = /etc/ssl/certs/ca-certificates.crt
 
+
 [Logging]
 # Log file path
 file = /var/log/arvados/node-manager.log
index 117f9b224bff2f4ca567b64e09de81b4ed34c692..f5329ebe16213ad1d7fa37aff09212efce299603 100644 (file)
@@ -65,6 +65,15 @@ boot_fail_after = 1800
 # an Arvados node that hasn't been updated for this long.
 node_stale_after = 14400
 
+# Number of consecutive times a node must report as "idle" before it
+# will be considered eligible for shutdown.  Node status is checked
+# each poll period, and node can go idle at any point during a poll
+# period (meaning a node could be reported as idle that has only been
+# idle for 1 second).  With a 60 second poll period, three consecutive
+# status updates of "idle" suggests the node has been idle at least
+# 121 seconds.
+consecutive_idle_count = 3
+
 # Scaling factor to be applied to nodes' available RAM size. Usually there's a
 # variable discrepancy between the advertised RAM value on cloud nodes and the
 # actual amount available.
index 8a244a444487052cd1543d9135448703741ca3e0..acd3fd1e3e6ab6a36720670f439cc2061f2c574f 100644 (file)
@@ -54,6 +54,15 @@ poll_stale_after = 600
 # an Arvados node that hasn't been updated for this long.
 node_stale_after = 14400
 
+# Number of consecutive times a node must report as "idle" before it
+# will be considered eligible for shutdown.  Node status is checked
+# each poll period, and node can go idle at any point during a poll
+# period (meaning a node could be reported as idle that has only been
+# idle for 1 second).  With a 60 second poll period, three consecutive
+# status updates of "idle" suggests the node has been idle at least
+# 121 seconds.
+consecutive_idle_count = 3
+
 # Scaling factor to be applied to nodes' available RAM size. Usually there's a
 # variable discrepancy between the advertised RAM value on cloud nodes and the
 # actual amount available.
index d94ceb2fa40a3e7689b76341573aa44155bb003a..1e41f3dad2fd32cfa3f42c461f2b21362796cb8e 100644 (file)
@@ -40,7 +40,8 @@ setup(name='arvados-node-manager',
           'future',
           'pykka',
           'python-daemon',
-          'setuptools'
+          'setuptools',
+          'subprocess32>=3.5.1',
       ],
       dependency_links=[
           "https://github.com/curoverse/libcloud/archive/apache-libcloud-2.3.1.dev1.zip"
@@ -51,6 +52,7 @@ setup(name='arvados-node-manager',
           'pbr<1.7.0',
           'mock>=1.0',
           'apache-libcloud>=2.3.1.dev1',
+          'subprocess32>=3.5.1',
       ],
       zip_safe=False
       )
index a8429e1369b62c2bf456a4225e45a3e38a6343b2..1ba2957ee5544c9346bbb00bc3f0e2ad9e51276a 100755 (executable)
@@ -12,7 +12,7 @@ events or behaviors for each test.
 
 """
 
-import subprocess
+import subprocess32 as subprocess
 import os
 import sys
 import re
@@ -115,7 +115,10 @@ def node_shutdown(g):
     global compute_nodes
     if g.group(1) in compute_nodes:
         del compute_nodes[g.group(1)]
-    return 0
+        return 0
+    else:
+        return 1
+
 
 def jobs_req(g):
     global all_jobs
index 778c9aeaf5ffdbbcecaf90ac8072ace7210ce4a5..aee3cbdac8928cb8237357b9250d595bba349ba9 100644 (file)
@@ -424,7 +424,7 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.make_actor()
         self.shutdowns._set_state(True, 600)
         self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT),
-                          (False, "node state is ('unpaired', 'open', 'boot wait', 'idle exceeded')"))
+                          (False, "node state is ('unpaired', 'open', 'boot wait', 'not idle')"))
 
     def test_shutdown_when_invalid_cloud_node_size(self):
         self.make_mocks(1)
@@ -438,7 +438,7 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
     def test_shutdown_without_arvados_node(self):
         self.make_actor(start_time=0)
         self.shutdowns._set_state(True, 600)
-        self.assertEquals((True, "node state is ('down', 'open', 'boot exceeded', 'idle exceeded')"),
+        self.assertEquals((True, "node state is ('down', 'open', 'boot exceeded', 'not idle')"),
                           self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
     def test_shutdown_missing(self):
@@ -447,7 +447,7 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
                                               last_ping_at='1970-01-01T01:02:03.04050607Z')
         self.make_actor(10, arv_node)
         self.shutdowns._set_state(True, 600)
-        self.assertEquals((True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"),
+        self.assertEquals((True, "node state is ('down', 'open', 'boot wait', 'not idle')"),
                           self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
     def test_shutdown_running_broken(self):
@@ -456,7 +456,7 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.make_actor(12, arv_node)
         self.shutdowns._set_state(True, 600)
         self.cloud_client.broken.return_value = True
-        self.assertEquals((True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"),
+        self.assertEquals((True, "node state is ('down', 'open', 'boot wait', 'not idle')"),
                           self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
     def test_shutdown_missing_broken(self):
@@ -466,7 +466,7 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.make_actor(11, arv_node)
         self.shutdowns._set_state(True, 600)
         self.cloud_client.broken.return_value = True
-        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"))
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('down', 'open', 'boot wait', 'not idle')"))
 
     def test_no_shutdown_when_window_closed(self):
         self.make_actor(3, testutil.arvados_node_mock(3, job_uuid=None))
@@ -476,7 +476,7 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
     def test_no_shutdown_when_node_running_job(self):
         self.make_actor(4, testutil.arvados_node_mock(4, job_uuid=True))
         self.shutdowns._set_state(True, 600)
-        self.assertEquals((False, "node state is ('busy', 'open', 'boot wait', 'idle exceeded')"),
+        self.assertEquals((False, "node state is ('busy', 'open', 'boot wait', 'not idle')"),
                           self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
     def test_shutdown_when_node_state_unknown(self):
@@ -490,7 +490,7 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.make_actor(5, testutil.arvados_node_mock(
             5, crunch_worker_state='fail'))
         self.shutdowns._set_state(True, 600)
-        self.assertEquals((True, "node state is ('fail', 'open', 'boot wait', 'idle exceeded')"),
+        self.assertEquals((True, "node state is ('fail', 'open', 'boot wait', 'not idle')"),
                           self.node_actor.shutdown_eligible().get(self.TIMEOUT))
 
     def test_no_shutdown_when_node_state_stale(self):
index 840d0a582ab76681893600403bfb9c1ac6626215..02d8fb62e0b8b624131974c49e4869dda0c06299 100644 (file)
@@ -5,7 +5,7 @@
 
 from __future__ import absolute_import, print_function
 
-import subprocess
+import subprocess32 as subprocess
 import time
 import unittest
 
@@ -18,7 +18,7 @@ from .test_computenode_dispatch import \
     ComputeNodeSetupActorTestCase, \
     ComputeNodeUpdateActorTestCase
 
-@mock.patch('subprocess.check_output')
+@mock.patch('subprocess32.check_output')
 class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
                                             unittest.TestCase):
     ACTOR_CLASS = slurm_dispatch.ComputeNodeShutdownActor
@@ -117,7 +117,7 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         super(SLURMComputeNodeShutdownActorTestCase,
               self).test_uncancellable_shutdown()
 
-@mock.patch('subprocess.check_output')
+@mock.patch('subprocess32.check_output')
 class SLURMComputeNodeUpdateActorTestCase(ComputeNodeUpdateActorTestCase):
     ACTOR_CLASS = slurm_dispatch.ComputeNodeUpdateActor
 
@@ -131,7 +131,7 @@ class SLURMComputeNodeUpdateActorTestCase(ComputeNodeUpdateActorTestCase):
 class SLURMComputeNodeSetupActorTestCase(ComputeNodeSetupActorTestCase):
     ACTOR_CLASS = slurm_dispatch.ComputeNodeSetupActor
 
-    @mock.patch('subprocess.check_output')
+    @mock.patch('subprocess32.check_output')
     def test_update_node_features(self, check_output):
         # `scontrol update` happens only if the Arvados node record
         # has a hostname. ComputeNodeSetupActorTestCase.make_mocks
@@ -142,14 +142,14 @@ class SLURMComputeNodeSetupActorTestCase(ComputeNodeSetupActorTestCase):
         self.wait_for_assignment(self.setup_actor, 'cloud_node')
         check_output.assert_called_with(['scontrol', 'update', 'NodeName=compute99', 'Weight=1000', 'Features=instancetype=z1.test'])
 
-    @mock.patch('subprocess.check_output')
+    @mock.patch('subprocess32.check_output')
     def test_failed_arvados_calls_retried(self, check_output):
         super(SLURMComputeNodeSetupActorTestCase, self).test_failed_arvados_calls_retried()
 
-    @mock.patch('subprocess.check_output')
+    @mock.patch('subprocess32.check_output')
     def test_subscribe(self, check_output):
         super(SLURMComputeNodeSetupActorTestCase, self).test_subscribe()
 
-    @mock.patch('subprocess.check_output')
+    @mock.patch('subprocess32.check_output')
     def test_creation_with_arvados_node(self, check_output):
         super(SLURMComputeNodeSetupActorTestCase, self).test_creation_with_arvados_node()
index d09cbf72359610ac08afa428e39f024d3086835c..1b6e4ca8da4aa24bfb45f8382e7b5d7700cd2bf2 100644 (file)
@@ -620,10 +620,26 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         monitor = self.monitor_list()[0].proxy()
         self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
         self.assertTrue(self.node_shutdown.start.called)
+        getmock = mock.MagicMock()
+        getmock.get.return_value = False
+        self.last_shutdown.cancel_shutdown.return_value = getmock
         self.daemon.update_server_wishlist(
             [testutil.MockSize(6)]).get(self.TIMEOUT)
         self.busywait(lambda: self.node_setup.start.called)
 
+    def test_nodes_shutting_down_cancelled(self):
+        size = testutil.MockSize(6)
+        cloud_node = testutil.cloud_node_mock(6, size=size)
+        self.make_daemon([cloud_node], [testutil.arvados_node_mock(6, crunch_worker_state='down')],
+                         avail_sizes=[(size, {"cores":1})])
+        self.assertEqual(1, self.alive_monitor_count())
+        monitor = self.monitor_list()[0].proxy()
+        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
+        self.assertTrue(self.node_shutdown.start.called)
+        self.daemon.update_server_wishlist(
+            [testutil.MockSize(6)]).get(self.TIMEOUT)
+        self.busywait(lambda: self.last_shutdown.cancel_shutdown.called)
+
     def test_nodes_shutting_down_not_replaced_at_max_nodes(self):
         cloud_node = testutil.cloud_node_mock(7)
         self.make_daemon([cloud_node], [testutil.arvados_node_mock(7)],
index 8c10f1b426e4bf71b036e17208f4056c27323327..de83b68fed81b5daa313cda732477be311302ee2 100644 (file)
@@ -154,8 +154,8 @@ class JobQueueMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
         super(JobQueueMonitorActorTestCase, self).build_monitor(*args, **kwargs)
         self.client.jobs().queue().execute.side_effect = side_effect
 
-    @mock.patch("subprocess.check_call")
-    @mock.patch("subprocess.check_output")
+    @mock.patch("subprocess32.check_call")
+    @mock.patch("subprocess32.check_output")
     def test_unsatisfiable_jobs(self, mock_squeue, mock_scancel):
         job_uuid = 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'
         container_uuid = 'yyyyy-dz642-yyyyyyyyyyyyyyy'
@@ -169,7 +169,7 @@ class JobQueueMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
         self.client.jobs().cancel.assert_called_with(uuid=job_uuid)
         mock_scancel.assert_called_with(['scancel', '--name='+container_uuid])
 
-    @mock.patch("subprocess.check_output")
+    @mock.patch("subprocess32.check_output")
     def test_subscribers_get_server_lists(self, mock_squeue):
         mock_squeue.return_value = ""
 
@@ -179,7 +179,7 @@ class JobQueueMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
         self.subscriber.assert_called_with([testutil.MockSize(1),
                                             testutil.MockSize(2)])
 
-    @mock.patch("subprocess.check_output")
+    @mock.patch("subprocess32.check_output")
     def test_squeue_server_list(self, mock_squeue):
         mock_squeue.return_value = """1|1024|0|(Resources)|zzzzz-dz642-zzzzzzzzzzzzzzy|(null)|1234567890
 2|1024|0|(Resources)|zzzzz-dz642-zzzzzzzzzzzzzzz|(null)|1234567890
@@ -193,7 +193,7 @@ class JobQueueMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
         self.subscriber.assert_called_with([testutil.MockSize(1),
                                             testutil.MockSize(2)])
 
-    @mock.patch("subprocess.check_output")
+    @mock.patch("subprocess32.check_output")
     def test_squeue_server_list_suffix(self, mock_squeue):
         mock_squeue.return_value = """1|1024M|0|(ReqNodeNotAvail, UnavailableNodes:compute123)|zzzzz-dz642-zzzzzzzzzzzzzzy|(null)|1234567890
 1|2G|0|(ReqNodeNotAvail)|zzzzz-dz642-zzzzzzzzzzzzzzz|(null)|1234567890
@@ -207,7 +207,7 @@ class JobQueueMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
         self.subscriber.assert_called_with([testutil.MockSize(1),
                                             testutil.MockSize(2)])
 
-    @mock.patch("subprocess.check_output")
+    @mock.patch("subprocess32.check_output")
     def test_squeue_server_list_instancetype_constraint(self, mock_squeue):
         mock_squeue.return_value = """1|1024|0|(Resources)|zzzzz-dz642-zzzzzzzzzzzzzzy|instancetype=z2.test|1234567890\n"""
         super(JobQueueMonitorActorTestCase, self).build_monitor(jobqueue.ServerCalculator(
index b087325c6f702347d68bd68983793b9ab3536787..df31a12267c6ab3447272ea66414af5f408fba2b 100644 (file)
@@ -21,7 +21,7 @@ class ArvadosNodeListMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
             *args, **kwargs)
         self.client.nodes().list().execute.side_effect = side_effect
 
-    @mock.patch("subprocess.check_output")
+    @mock.patch("subprocess32.check_output")
     def test_uuid_is_subscription_key(self, sinfo_mock):
         sinfo_mock.return_value = ""
         node = testutil.arvados_node_mock()
@@ -40,7 +40,7 @@ class ArvadosNodeListMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
         self.subscriber.assert_called_with(node)
         self.assertEqual("down", node["crunch_worker_state"])
 
-    @mock.patch("subprocess.check_output")
+    @mock.patch("subprocess32.check_output")
     def test_update_from_sinfo(self, sinfo_mock):
         sinfo_mock.return_value = """compute1|idle|instancetype=a1.test
 compute2|alloc|(null)
index a4f750b4c4d0445567ad20da7ac9408eb12a692d..f18d4e464cdf34ed86c0be1a4631aadc598179df 100644 (file)
                        "revision": "d682213848ed68c0a260ca37d6dd5ace8423f5ba",
                        "revisionTime": "2017-12-05T20:32:29Z"
                },
+               {
+                       "checksumSHA1": "st4vb0GmDeoKbsfxdpNZ2MPl76M=",
+                       "path": "github.com/StackExchange/wmi",
+                       "revision": "cdffdb33acae0e14efff2628f9bae377b597840e",
+                       "revisionTime": "2018-04-12T20:51:11Z"
+               },
                {
                        "checksumSHA1": "spyv5/YFBjYyZLZa1U2LBfDR8PM=",
                        "path": "github.com/beorn7/perks/quantile",
                        "revision": "0ca9ea5df5451ffdf184b4428c902747c2c11cd7",
                        "revisionTime": "2017-03-27T23:54:44Z"
                },
+               {
+                       "checksumSHA1": "Kqv7bA4oJG0nPwQvGWDwGGaKONo=",
+                       "path": "github.com/go-ole/go-ole",
+                       "revision": "7a0fa49edf48165190530c675167e2f319a05268",
+                       "revisionTime": "2018-06-25T08:58:08Z"
+               },
+               {
+                       "checksumSHA1": "PArleDBtadu2qO4hJwHR8a3IOTA=",
+                       "path": "github.com/go-ole/go-ole/oleutil",
+                       "revision": "7a0fa49edf48165190530c675167e2f319a05268",
+                       "revisionTime": "2018-06-25T08:58:08Z"
+               },
                {
                        "checksumSHA1": "wn2shNJMwRZpvuvkf1s7h0wvqHI=",
                        "path": "github.com/gogo/protobuf/proto",
                        "revision": "1744e2970ca51c86172c8190fadad617561ed6e7",
                        "revisionTime": "2017-11-10T11:01:46Z"
                },
+               {
+                       "checksumSHA1": "q14d3C3xvWevU3dSv4P5K0+OSD0=",
+                       "path": "github.com/shirou/gopsutil/cpu",
+                       "revision": "63728fcf6b24475ecfea044e22242447666c2f52",
+                       "revisionTime": "2018-07-05T13:28:12Z"
+               },
+               {
+                       "checksumSHA1": "LZ9GloiGLTISmQ4dalK2XspH6Wo=",
+                       "path": "github.com/shirou/gopsutil/host",
+                       "revision": "63728fcf6b24475ecfea044e22242447666c2f52",
+                       "revisionTime": "2018-07-05T13:28:12Z"
+               },
+               {
+                       "checksumSHA1": "cyoqI0gryzjxGTkaAfyUqMiuUR0=",
+                       "path": "github.com/shirou/gopsutil/internal/common",
+                       "revision": "63728fcf6b24475ecfea044e22242447666c2f52",
+                       "revisionTime": "2018-07-05T13:28:12Z"
+               },
+               {
+                       "checksumSHA1": "vEQLjAO5T5K9zXblEMYdoaBZzj0=",
+                       "path": "github.com/shirou/gopsutil/mem",
+                       "revision": "63728fcf6b24475ecfea044e22242447666c2f52",
+                       "revisionTime": "2018-07-05T13:28:12Z"
+               },
+               {
+                       "checksumSHA1": "KMWFRa0DVpabo9d8euB4RYjUBQE=",
+                       "path": "github.com/shirou/gopsutil/net",
+                       "revision": "63728fcf6b24475ecfea044e22242447666c2f52",
+                       "revisionTime": "2018-07-05T13:28:12Z"
+               },
+               {
+                       "checksumSHA1": "fbO7c1gv1kSvWKOb/+5HUWFkBaA=",
+                       "path": "github.com/shirou/gopsutil/process",
+                       "revision": "63728fcf6b24475ecfea044e22242447666c2f52",
+                       "revisionTime": "2018-07-05T13:28:12Z"
+               },
+               {
+                       "checksumSHA1": "Nve7SpDmjsv6+rhkXAkfg/UQx94=",
+                       "path": "github.com/shirou/w32",
+                       "revision": "bb4de0191aa41b5507caa14b0650cdbddcd9280b",
+                       "revisionTime": "2016-09-30T03:27:40Z"
+               },
                {
                        "checksumSHA1": "8QeSG127zQqbA+YfkO1WkKx/iUI=",
                        "path": "github.com/src-d/gcfg",