Merge branch '15964-fix-docs' refs #15964
authorPeter Amstutz <peter.amstutz@curii.com>
Tue, 25 Aug 2020 18:45:29 +0000 (14:45 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Tue, 25 Aug 2020 18:45:29 +0000 (14:45 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

246 files changed:
apps/workbench/Gemfile
apps/workbench/Gemfile.lock
apps/workbench/app/controllers/application_controller.rb
apps/workbench/app/models/application_record.rb [deleted file]
apps/workbench/app/models/arvados_base.rb
apps/workbench/bin/bundle
apps/workbench/bin/setup
apps/workbench/bin/update
apps/workbench/bin/yarn [new file with mode: 0755]
apps/workbench/config/application.default.yml
apps/workbench/config/application.rb
apps/workbench/config/boot.rb
apps/workbench/config/initializers/content_security_policy.rb [new file with mode: 0644]
apps/workbench/config/initializers/new_framework_defaults.rb
apps/workbench/config/initializers/new_framework_defaults_5_1.rb [new file with mode: 0644]
apps/workbench/config/initializers/new_framework_defaults_5_2.rb [new file with mode: 0644]
apps/workbench/config/routes.rb
apps/workbench/config/secrets.yml
build/check-copyright-notices
build/package-build-dockerfiles/centos7/Dockerfile
build/package-build-dockerfiles/debian10/Dockerfile
build/package-build-dockerfiles/debian9/Dockerfile
build/package-build-dockerfiles/ubuntu1604/Dockerfile
build/package-build-dockerfiles/ubuntu1804/Dockerfile
build/package-testing/test-package-arvados-docker-cleaner.sh [new file with mode: 0755]
build/package-testing/test-package-arvados-node-manager.sh [deleted file]
build/package-testing/test-package-python-arvados-cwl-runner.sh [deleted symlink]
build/package-testing/test-package-python-arvados-fuse.sh [deleted symlink]
build/package-testing/test-package-python-arvados-python-client.sh [deleted symlink]
build/package-testing/test-package-python-cwltest.sh [deleted symlink]
build/package-testing/test-package-python27-python-arvados-python-client.sh [deleted file]
build/package-testing/test-package-python3-arvados-cwl-runner.sh
build/package-testing/test-package-python3-arvados-python-client.sh
build/package-testing/test-package-python3-crunchstat-summary.sh [moved from services/nodemanager/arvnodeman/test/__init__.py with 66% similarity, mode: 0755]
build/package-testing/test-package-python3-cwltest.sh [moved from build/package-testing/test-package-python27-python-arvados-cwl-runner.sh with 79% similarity]
build/package-testing/test-package-python3-python-arvados-fuse.sh [changed from symlink to file mode: 0755]
build/package-testing/test-package-rh-python36-python-arvados-cwl-runner.sh [new file with mode: 0755]
build/package-testing/test-package-rh-python36-python-arvados-fuse.sh [moved from build/package-testing/test-package-python27-python-arvados-fuse.sh with 100% similarity]
build/package-testing/test-package-rh-python36-python-crunchstat-summary.sh [new file with mode: 0755]
build/package-testing/test-package-rh-python36-python-cwltest.sh [moved from build/package-testing/test-package-python27-python-cwltest.sh with 74% similarity]
build/rails-package-scripts/README.md
build/rails-package-scripts/arvados-api-server.sh
build/rails-package-scripts/prerm.sh
build/run-build-docker-jobs-image.sh
build/run-build-packages-one-target.sh
build/run-build-packages-python-and-ruby.sh
build/run-build-packages.sh
build/run-library.sh
build/run-tests.sh
doc/_config.yml
doc/admin/management-token.html.textile.liquid
doc/admin/metrics.html.textile.liquid
doc/admin/scoped-tokens.html.textile.liquid
doc/admin/spot-instances.html.textile.liquid
doc/admin/token-expiration-policy.html.textile.liquid [new file with mode: 0644]
doc/admin/upgrading.html.textile.liquid
doc/admin/user-management-cli.html.textile.liquid
doc/architecture/index.html.textile.liquid
doc/install/crunch2-cloud/install-compute-node.html.textile.liquid
doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid
doc/install/install-manual-prerequisites.html.textile.liquid
doc/install/install-nodemanager.html.textile.liquid [deleted file]
doc/sdk/python/cookbook.html.textile.liquid
go.mod
go.sum
lib/boot/supervisor.go
lib/cloud/azure/azure.go
lib/cloud/azure/azure_test.go
lib/config/config.default.yml
lib/config/deprecated.go
lib/config/export.go
lib/config/generated_config.go
lib/controller/federation/federation_test.go
lib/controller/handler.go
lib/controller/localdb/login.go
lib/controller/localdb/login_ldap_test.go
lib/controller/localdb/login_testuser.go [new file with mode: 0644]
lib/controller/localdb/login_testuser_test.go [new file with mode: 0644]
lib/dispatchcloud/dispatcher_test.go
lib/dispatchcloud/scheduler/run_queue.go
lib/dispatchcloud/scheduler/run_queue_test.go
lib/dispatchcloud/scheduler/sync.go
lib/dispatchcloud/test/queue.go
lib/dispatchcloud/test/stub_driver.go
sdk/cli/arvados-cli.gemspec
sdk/go/arvados/config.go
sdk/go/arvados/fs_base.go
sdk/go/arvados/fs_collection.go
sdk/go/arvados/fs_collection_test.go
sdk/go/arvados/fs_deferred.go
sdk/go/arvados/fs_lookup.go
sdk/go/arvados/fs_project.go
sdk/go/arvados/fs_project_test.go
sdk/go/arvados/fs_site.go
sdk/go/arvados/fs_site_test.go
sdk/go/health/aggregator_test.go
sdk/go/keepclient/keepclient_test.go
sdk/pam/.dockerignore [deleted file]
sdk/pam/.gitignore [deleted symlink]
sdk/pam/Dockerfile [deleted file]
sdk/pam/LICENSE-2.0.txt [deleted file]
sdk/pam/MANIFEST.in [deleted file]
sdk/pam/README.rst [deleted file]
sdk/pam/arvados_pam/__init__.py [deleted file]
sdk/pam/arvados_pam/auth_event.py [deleted file]
sdk/pam/arvados_version.py [deleted file]
sdk/pam/examples/shellinabox [deleted file]
sdk/pam/fpm-info.sh [deleted file]
sdk/pam/gittaggers.py [deleted symlink]
sdk/pam/integration_tests/__init__.py [deleted file]
sdk/pam/integration_tests/test_pam.py [deleted file]
sdk/pam/lib/libpam_arvados.py [deleted file]
sdk/pam/pam-configs/arvados [deleted file]
sdk/pam/setup.py [deleted file]
sdk/pam/tests/__init__.py [deleted file]
sdk/pam/tests/integration_test.pl [deleted file]
sdk/pam/tests/mocker.py [deleted file]
sdk/pam/tests/test_auth_event.py [deleted file]
sdk/pam/tests/test_pam_sm.py [deleted file]
services/api/.gitignore
services/api/Gemfile
services/api/Gemfile.lock
services/api/app/controllers/application_controller.rb
services/api/app/controllers/user_sessions_controller.rb
services/api/app/models/api_client.rb
services/api/app/models/api_client_authorization.rb
services/api/app/models/arvados_model.rb
services/api/app/models/collection.rb
services/api/app/models/container.rb
services/api/app/models/container_request.rb
services/api/app/models/group.rb
services/api/app/models/node.rb
services/api/app/models/user.rb
services/api/bin/bundle
services/api/bin/setup
services/api/bin/update
services/api/bin/yarn [new file with mode: 0755]
services/api/config/application.rb
services/api/config/arvados_config.rb
services/api/config/boot.rb
services/api/config/environments/development.rb.example
services/api/config/environments/production.rb.example
services/api/config/environments/test.rb.example
services/api/config/initializers/content_security_policy.rb [new file with mode: 0644]
services/api/config/initializers/legacy_jobs_api.rb
services/api/config/initializers/new_framework_defaults_5_2.rb [new file with mode: 0644]
services/api/config/initializers/preload_all_models.rb [deleted file]
services/api/config/initializers/time_zone.rb
services/api/config/initializers/wrap_parameters.rb
services/api/config/routes.rb
services/api/config/secrets.yml [new file with mode: 0644]
services/api/lib/audit_logs.rb
services/api/lib/sweep_trashed_objects.rb
services/api/lib/tasks/manage_long_lived_tokens.rake [new file with mode: 0644]
services/api/lib/update_priority.rb
services/api/test/functional/arvados/v1/keep_services_controller_test.rb
services/api/test/functional/user_sessions_controller_test.rb
services/api/test/unit/api_client_test.rb
services/api/test/unit/arvados_model_test.rb
services/api/test/unit/log_test.rb
services/api/test/unit/node_test.rb
services/arv-git-httpd/gitolite_test.go
services/arv-web/README [deleted file]
services/arv-web/arv-web.py [deleted file]
services/arv-web/sample-cgi-app/docker_image [deleted file]
services/arv-web/sample-cgi-app/public/.htaccess [deleted file]
services/arv-web/sample-cgi-app/public/index.cgi [deleted file]
services/arv-web/sample-cgi-app/tmp/.keepkeep [deleted file]
services/arv-web/sample-rack-app/config.ru [deleted file]
services/arv-web/sample-rack-app/docker_image [deleted file]
services/arv-web/sample-rack-app/public/.keepkeep [deleted file]
services/arv-web/sample-rack-app/tmp/.keepkeep [deleted file]
services/arv-web/sample-static-page/docker_image [deleted file]
services/arv-web/sample-static-page/public/index.html [deleted file]
services/arv-web/sample-static-page/tmp/.keepkeep [deleted file]
services/arv-web/sample-wsgi-app/docker_image [deleted file]
services/arv-web/sample-wsgi-app/passenger_wsgi.py [deleted file]
services/arv-web/sample-wsgi-app/public/.keepkeep [deleted file]
services/arv-web/sample-wsgi-app/tmp/.keepkeep [deleted file]
services/keep-web/handler.go
services/keep-web/main.go
services/keep-web/s3.go [new file with mode: 0644]
services/keep-web/s3_test.go [new file with mode: 0644]
services/keep-web/server.go
services/keep-web/server_test.go
services/nodemanager/.gitignore [deleted symlink]
services/nodemanager/MANIFEST.in [deleted file]
services/nodemanager/README.rst [deleted file]
services/nodemanager/agpl-3.0.txt [deleted file]
services/nodemanager/arvados-node-manager.service [deleted file]
services/nodemanager/arvados_version.py [deleted file]
services/nodemanager/arvnodeman/__init__.py [deleted file]
services/nodemanager/arvnodeman/baseactor.py [deleted file]
services/nodemanager/arvnodeman/clientactor.py [deleted file]
services/nodemanager/arvnodeman/computenode/__init__.py [deleted file]
services/nodemanager/arvnodeman/computenode/dispatch/__init__.py [deleted file]
services/nodemanager/arvnodeman/computenode/dispatch/slurm.py [deleted file]
services/nodemanager/arvnodeman/computenode/dispatch/transitions.py [deleted file]
services/nodemanager/arvnodeman/computenode/driver/__init__.py [deleted file]
services/nodemanager/arvnodeman/computenode/driver/azure.py [deleted file]
services/nodemanager/arvnodeman/computenode/driver/dummy.py [deleted file]
services/nodemanager/arvnodeman/computenode/driver/ec2.py [deleted file]
services/nodemanager/arvnodeman/computenode/driver/gce.py [deleted file]
services/nodemanager/arvnodeman/config.py [deleted file]
services/nodemanager/arvnodeman/daemon.py [deleted file]
services/nodemanager/arvnodeman/jobqueue.py [deleted file]
services/nodemanager/arvnodeman/launcher.py [deleted file]
services/nodemanager/arvnodeman/nodelist.py [deleted file]
services/nodemanager/arvnodeman/status.py [deleted file]
services/nodemanager/arvnodeman/test/fake_driver.py [deleted file]
services/nodemanager/arvnodeman/timedcallback.py [deleted file]
services/nodemanager/bin/arvados-node-manager [deleted file]
services/nodemanager/doc/azure.example.cfg [deleted file]
services/nodemanager/doc/ec2.example.cfg [deleted file]
services/nodemanager/doc/gce.example.cfg [deleted file]
services/nodemanager/doc/local.example.cfg [deleted file]
services/nodemanager/fpm-info.sh [deleted file]
services/nodemanager/gittaggers.py [deleted symlink]
services/nodemanager/setup.py [deleted file]
services/nodemanager/tests/__init__.py [deleted file]
services/nodemanager/tests/fake_azure.cfg.template [deleted file]
services/nodemanager/tests/fake_ec2.cfg.template [deleted file]
services/nodemanager/tests/fake_gce.cfg.template [deleted file]
services/nodemanager/tests/integration_test.py [deleted file]
services/nodemanager/tests/stress_test.cwl [deleted file]
services/nodemanager/tests/test_arguments.py [deleted file]
services/nodemanager/tests/test_clientactor.py [deleted file]
services/nodemanager/tests/test_computenode.py [deleted file]
services/nodemanager/tests/test_computenode_dispatch.py [deleted file]
services/nodemanager/tests/test_computenode_dispatch_slurm.py [deleted file]
services/nodemanager/tests/test_computenode_driver.py [deleted file]
services/nodemanager/tests/test_computenode_driver_azure.py [deleted file]
services/nodemanager/tests/test_computenode_driver_ec2.py [deleted file]
services/nodemanager/tests/test_computenode_driver_gce.py [deleted file]
services/nodemanager/tests/test_config.py [deleted file]
services/nodemanager/tests/test_daemon.py [deleted file]
services/nodemanager/tests/test_failure.py [deleted file]
services/nodemanager/tests/test_jobqueue.py [deleted file]
services/nodemanager/tests/test_nodelist.py [deleted file]
services/nodemanager/tests/test_status.py [deleted file]
services/nodemanager/tests/test_timedcallback.py [deleted file]
services/nodemanager/tests/testutil.py [deleted file]
tools/arvbox/bin/arvbox
tools/compute-images/arvados-images-azure.json
tools/compute-images/build.sh
tools/crunchstat-summary/crunchstat_summary/summarizer.py

index 24bfba383fc7065a5709293acb91943258a2842a..d5b416b5396f678b029bbaa20fb6bdba1e8a6bb2 100644 (file)
@@ -4,7 +4,7 @@
 
 source 'https://rubygems.org'
 
-gem 'rails', '~> 5.0.0'
+gem 'rails', '~> 5.2.0'
 gem 'arvados', git: 'https://github.com/arvados/arvados.git', glob: 'sdk/ruby/arvados.gemspec'
 
 gem 'activerecord-nulldb-adapter', git: 'https://github.com/arvados/nulldb'
@@ -14,6 +14,13 @@ gem 'sass'
 gem 'mime-types'
 gem 'responders', '~> 2.0'
 
+# Pin sprockets to < 4.0 to avoid issues when upgrading rails to 5.2
+# See: https://github.com/rails/sprockets-rails/issues/443
+gem 'sprockets', '~> 3.0'
+
+# Fast app boot times
+gem 'bootsnap', require: false
+
 # Note: keeping this out of the "group :assets" section "may" allow us
 # to use Coffescript for UJS responses. It also prevents a
 # warning/problem when running tests: "WARN: tilt autoloading
@@ -31,8 +38,14 @@ group :assets do
   gem 'therubyracer', :platforms => :ruby
 end
 
-group :development do
+group :development, :test, :performance do
   gem 'byebug'
+  # Pinning launchy because 2.5 requires ruby >= 2.4, which arvbox currently
+  # doesn't have because of SSO.
+  gem 'launchy', '~> 2.4.0'
+end
+
+group :development do
   gem 'ruby-debug-passenger'
   gem 'rack-mini-profiler', require: false
   gem 'flamegraph', require: false
@@ -48,7 +61,6 @@ group :test, :diagnostics, :performance do
 end
 
 group :test, :performance do
-  gem 'byebug'
   gem 'rails-perftest'
   gem 'ruby-prof'
   gem 'rvm-capistrano'
@@ -70,12 +82,6 @@ gem 'angularjs-rails', '~> 1.3.8'
 
 gem 'less'
 gem 'less-rails'
-
-# Wiselinks hasn't been updated for many years and it's using deprecated methods
-# Use our own Wiselinks fork until this PR is accepted:
-# https://github.com/igor-alexandrov/wiselinks/pull/116
-# gem 'wiselinks', git: 'https://github.com/arvados/wiselinks.git', branch: 'rails-5.1-compatibility'
-
 gem 'sshkey'
 
 # To use ActiveModel has_secure_password
index cb4e7ab9e334cb8fdb0ae72c20ee841f4fed02b2..e19172cb2ee54bba81f29e7f803f7b21f7b27f50 100644 (file)
@@ -30,39 +30,43 @@ GEM
   remote: https://rubygems.org/
   specs:
     RedCloth (4.3.2)
-    actioncable (5.0.7.2)
-      actionpack (= 5.0.7.2)
-      nio4r (>= 1.2, < 3.0)
-      websocket-driver (~> 0.6.1)
-    actionmailer (5.0.7.2)
-      actionpack (= 5.0.7.2)
-      actionview (= 5.0.7.2)
-      activejob (= 5.0.7.2)
+    actioncable (5.2.4.3)
+      actionpack (= 5.2.4.3)
+      nio4r (~> 2.0)
+      websocket-driver (>= 0.6.1)
+    actionmailer (5.2.4.3)
+      actionpack (= 5.2.4.3)
+      actionview (= 5.2.4.3)
+      activejob (= 5.2.4.3)
       mail (~> 2.5, >= 2.5.4)
       rails-dom-testing (~> 2.0)
-    actionpack (5.0.7.2)
-      actionview (= 5.0.7.2)
-      activesupport (= 5.0.7.2)
-      rack (~> 2.0)
-      rack-test (~> 0.6.3)
+    actionpack (5.2.4.3)
+      actionview (= 5.2.4.3)
+      activesupport (= 5.2.4.3)
+      rack (~> 2.0, >= 2.0.8)
+      rack-test (>= 0.6.3)
       rails-dom-testing (~> 2.0)
       rails-html-sanitizer (~> 1.0, >= 1.0.2)
-    actionview (5.0.7.2)
-      activesupport (= 5.0.7.2)
+    actionview (5.2.4.3)
+      activesupport (= 5.2.4.3)
       builder (~> 3.1)
-      erubis (~> 2.7.0)
+      erubi (~> 1.4)
       rails-dom-testing (~> 2.0)
       rails-html-sanitizer (~> 1.0, >= 1.0.3)
-    activejob (5.0.7.2)
-      activesupport (= 5.0.7.2)
+    activejob (5.2.4.3)
+      activesupport (= 5.2.4.3)
       globalid (>= 0.3.6)
-    activemodel (5.0.7.2)
-      activesupport (= 5.0.7.2)
-    activerecord (5.0.7.2)
-      activemodel (= 5.0.7.2)
-      activesupport (= 5.0.7.2)
-      arel (~> 7.0)
-    activesupport (5.0.7.2)
+    activemodel (5.2.4.3)
+      activesupport (= 5.2.4.3)
+    activerecord (5.2.4.3)
+      activemodel (= 5.2.4.3)
+      activesupport (= 5.2.4.3)
+      arel (>= 9.0)
+    activestorage (5.2.4.3)
+      actionpack (= 5.2.4.3)
+      activerecord (= 5.2.4.3)
+      marcel (~> 0.3.1)
+    activesupport (5.2.4.3)
       concurrent-ruby (~> 1.0, >= 1.0.2)
       i18n (>= 0.7, < 2)
       minitest (~> 5.1)
@@ -71,9 +75,9 @@ GEM
       public_suffix (>= 2.0.2, < 5.0)
     andand (1.3.3)
     angularjs-rails (1.3.15)
-    arel (7.1.4)
-    arvados-google-api-client (0.8.7.3)
-      activesupport (>= 3.2, < 5.1)
+    arel (9.0.0)
+    arvados-google-api-client (0.8.7.4)
+      activesupport (>= 3.2, < 5.3)
       addressable (~> 2.3)
       autoparse (~> 0.3)
       extlib (~> 0.9)
@@ -89,6 +93,8 @@ GEM
       multi_json (>= 1.0.0)
     autoprefixer-rails (9.5.1.1)
       execjs
+    bootsnap (1.4.7)
+      msgpack (~> 1.0)
     bootstrap-sass (3.4.1)
       autoprefixer-rails (>= 5.2.1)
       sassc (>= 2.0.0)
@@ -96,7 +102,7 @@ GEM
       railties (>= 3.1)
     bootstrap-x-editable-rails (1.5.1.1)
       railties (>= 3.0)
-    builder (3.2.3)
+    builder (3.2.4)
     byebug (11.0.1)
     capistrano (2.15.9)
       highline
@@ -121,11 +127,11 @@ GEM
       execjs
     coffee-script-source (1.12.2)
     commonjs (0.2.7)
-    concurrent-ruby (1.1.5)
-    crass (1.0.5)
+    concurrent-ruby (1.1.6)
+    crass (1.0.6)
     deep_merge (1.2.1)
     docile (1.3.1)
-    erubis (2.7.0)
+    erubi (1.9.0)
     execjs (2.7.0)
     extlib (0.9.16)
     faraday (0.15.4)
@@ -167,25 +173,29 @@ GEM
       railties (>= 4)
       request_store (~> 1.0)
     logstash-event (1.2.02)
-    loofah (2.3.1)
+    loofah (2.6.0)
       crass (~> 1.0.2)
       nokogiri (>= 1.5.9)
     mail (2.7.1)
       mini_mime (>= 0.1.1)
+    marcel (0.3.3)
+      mimemagic (~> 0.3.2)
     memoist (0.16.2)
     metaclass (0.0.4)
-    method_source (0.9.2)
+    method_source (1.0.0)
     mime-types (3.2.2)
       mime-types-data (~> 3.2015)
     mime-types-data (3.2019.0331)
-    mini_mime (1.0.1)
+    mimemagic (0.3.5)
+    mini_mime (1.0.2)
     mini_portile2 (2.4.0)
     minitest (5.10.3)
     mocha (1.8.0)
       metaclass (~> 0.0.1)
     morrisjs-rails (0.5.1.2)
       railties (> 3.1, < 6)
-    multi_json (1.14.1)
+    msgpack (1.3.3)
+    multi_json (1.15.0)
     multipart-post (2.1.1)
     net-scp (2.0.0)
       net-ssh (>= 2.6.5, < 6.0.0)
@@ -194,13 +204,13 @@ GEM
     net-ssh (5.2.0)
     net-ssh-gateway (2.0.0)
       net-ssh (>= 4.0.0)
-    nio4r (2.3.1)
-    nokogiri (1.10.8)
+    nio4r (2.5.2)
+    nokogiri (1.10.10)
       mini_portile2 (~> 2.4.0)
     npm-rails (0.2.1)
       rails (>= 3.2)
     oj (3.7.12)
-    os (1.0.1)
+    os (1.1.1)
     passenger (6.0.2)
       rack
       rake (>= 0.8.1)
@@ -213,23 +223,24 @@ GEM
       cliver (~> 0.3.1)
       multi_json (~> 1.0)
       websocket-driver (>= 0.2.0)
-    public_suffix (4.0.3)
+    public_suffix (4.0.5)
     rack (2.2.3)
     rack-mini-profiler (1.0.2)
       rack (>= 1.2.0)
-    rack-test (0.6.3)
-      rack (>= 1.0)
-    rails (5.0.7.2)
-      actioncable (= 5.0.7.2)
-      actionmailer (= 5.0.7.2)
-      actionpack (= 5.0.7.2)
-      actionview (= 5.0.7.2)
-      activejob (= 5.0.7.2)
-      activemodel (= 5.0.7.2)
-      activerecord (= 5.0.7.2)
-      activesupport (= 5.0.7.2)
+    rack-test (1.1.0)
+      rack (>= 1.0, < 3)
+    rails (5.2.4.3)
+      actioncable (= 5.2.4.3)
+      actionmailer (= 5.2.4.3)
+      actionpack (= 5.2.4.3)
+      actionview (= 5.2.4.3)
+      activejob (= 5.2.4.3)
+      activemodel (= 5.2.4.3)
+      activerecord (= 5.2.4.3)
+      activestorage (= 5.2.4.3)
+      activesupport (= 5.2.4.3)
       bundler (>= 1.3.0)
-      railties (= 5.0.7.2)
+      railties (= 5.2.4.3)
       sprockets-rails (>= 2.0.0)
     rails-controller-testing (1.0.4)
       actionpack (>= 5.0.1.x)
@@ -238,15 +249,15 @@ GEM
     rails-dom-testing (2.0.3)
       activesupport (>= 4.2.0)
       nokogiri (>= 1.6)
-    rails-html-sanitizer (1.0.4)
-      loofah (~> 2.2, >= 2.2.2)
+    rails-html-sanitizer (1.3.0)
+      loofah (~> 2.3)
     rails-perftest (0.0.7)
-    railties (5.0.7.2)
-      actionpack (= 5.0.7.2)
-      activesupport (= 5.0.7.2)
+    railties (5.2.4.3)
+      actionpack (= 5.2.4.3)
+      activesupport (= 5.2.4.3)
       method_source
       rake (>= 0.8.7)
-      thor (>= 0.18.1, < 2.0)
+      thor (>= 0.19.0, < 2.0)
     rake (13.0.1)
     raphael-rails (2.1.2)
     rb-fsevent (0.10.3)
@@ -305,15 +316,15 @@ GEM
     therubyracer (0.12.3)
       libv8 (~> 3.16.14.15)
       ref
-    thor (0.20.3)
+    thor (1.0.1)
     thread_safe (0.3.6)
     tilt (2.0.9)
-    tzinfo (1.2.6)
+    tzinfo (1.2.7)
       thread_safe (~> 0.1)
     uglifier (2.7.2)
       execjs (>= 0.3.0)
       json (>= 1.8.0)
-    websocket-driver (0.6.5)
+    websocket-driver (0.7.3)
       websocket-extensions (>= 0.1.0)
     websocket-extensions (0.1.5)
     xpath (2.1.0)
@@ -328,6 +339,7 @@ DEPENDENCIES
   andand
   angularjs-rails (~> 1.3.8)
   arvados!
+  bootsnap
   bootstrap-sass (~> 3.4.1)
   bootstrap-tab-history-rails
   bootstrap-x-editable-rails
@@ -339,6 +351,7 @@ DEPENDENCIES
   headless (~> 1.0.2)
   httpclient (~> 2.5)
   jquery-rails
+  launchy (~> 2.4.0)
   less
   less-rails
   lograge
@@ -354,7 +367,7 @@ DEPENDENCIES
   piwik_analytics
   poltergeist (~> 1.5.1)
   rack-mini-profiler
-  rails (~> 5.0.0)
+  rails (~> 5.2.0)
   rails-controller-testing
   rails-perftest
   raphael-rails
@@ -369,10 +382,11 @@ DEPENDENCIES
   signet (< 0.12)
   simplecov (~> 0.7)
   simplecov-rcov
+  sprockets (~> 3.0)
   sshkey
   themes_for_rails!
   therubyracer
   uglifier (~> 2.0)
 
 BUNDLED WITH
-   1.16.6
+   1.17.3
index 8d6f897bb69b1770054d15337cb28cb6bf507876..77ec68bdb06eeb5d9bf124206528e62c491c547e 100644 (file)
@@ -29,7 +29,6 @@ class ApplicationController < ActionController::Base
   begin
     rescue_from(ActiveRecord::RecordNotFound,
                 ActionController::RoutingError,
-                ActionController::UnknownController,
                 AbstractController::ActionNotFound,
                 with: :render_not_found)
     rescue_from(Exception,
diff --git a/apps/workbench/app/models/application_record.rb b/apps/workbench/app/models/application_record.rb
deleted file mode 100644 (file)
index 759034d..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-class ApplicationRecord < ActiveRecord::Base
-  self.abstract_class = true
-end
\ No newline at end of file
index b9162c2aec364bd03a34171c9981262304bc9d06..c5e1a4ed2240075691fd6e03827c746be950f3aa 100644 (file)
@@ -106,6 +106,12 @@ class ArvadosBase
     end
   end
 
+  # The ActiveModel::Dirty API was changed on Rails 5.2
+  # See: https://github.com/rails/rails/commit/c3675f50d2e59b7fc173d7b332860c4b1a24a726#diff-aaddd42c7feb0834b1b5c66af69814d3
+  def mutations_from_database
+    @mutations_from_database ||= ActiveModel::NullMutationTracker.instance
+  end
+
   def self.columns
     @discovered_columns = [] if !defined?(@discovered_columns)
     return @discovered_columns if @discovered_columns.andand.any?
index 9447ba861219ee65467e1df44115bd4ceb87bf63..cb10307acd6cb385375417f47172eff01990dde4 100755 (executable)
@@ -3,5 +3,5 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __dir__)
 load Gem.bin_path('bundler', 'bundle')
index 50c3fa0548ccb97521e6ca22885209f81257ea24..7aed0fb2826fc94553b99f87db10cf379daad69d 100755 (executable)
@@ -3,12 +3,11 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-require 'pathname'
 require 'fileutils'
 include FileUtils
 
 # path to your application root.
-APP_ROOT = Pathname.new File.expand_path('../../', __FILE__)
+APP_ROOT = File.expand_path('..', __dir__)
 
 def system!(*args)
   system(*args) || abort("\n== Command #{args} failed ==")
@@ -22,6 +21,9 @@ chdir APP_ROOT do
   system! 'gem install bundler --conservative'
   system('bundle check') || system!('bundle install')
 
+  # Install JavaScript dependencies if using Yarn
+  # system('bin/yarn')
+
   # puts "\n== Copying sample files =="
   # unless File.exist?('config/database.yml')
   #   cp 'config/database.yml.sample', 'config/database.yml'
index b56771ece80ef11fc0e444889ecfe3d4fb23517a..46aa76ca87a921a313af9d3756a13f56629d92ab 100755 (executable)
@@ -22,6 +22,9 @@ chdir APP_ROOT do
   system! 'gem install bundler --conservative'
   system('bundle check') || system!('bundle install')
 
+  # Install JavaScript dependencies if using Yarn
+  # system('bin/yarn')
+
   puts "\n== Updating database =="
   system! 'bin/rails db:migrate'
 
diff --git a/apps/workbench/bin/yarn b/apps/workbench/bin/yarn
new file mode 100755 (executable)
index 0000000..5fc7611
--- /dev/null
@@ -0,0 +1,15 @@
+#!/usr/bin/env ruby
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+APP_ROOT = File.expand_path('..', __dir__)
+Dir.chdir(APP_ROOT) do
+  begin
+    exec "yarnpkg #{ARGV.join(" ")}"
+  rescue Errno::ENOENT
+    $stderr.puts "Yarn executable was not detected in the system."
+    $stderr.puts "Download Yarn at https://yarnpkg.com/en/docs/install"
+    exit 1
+  end
+end
index 9456e61455c306cb7b19db7963366c34a55b1345..255ad44f852f4b567005eae0c81a8f445dfbea8b 100644 (file)
@@ -77,7 +77,6 @@ test:
   action_mailer.delivery_method: :test
   active_support.deprecation: :stderr
   profiling_enabled: true
-  secret_token: <%= rand(2**256).to_s(36) %>
   secret_key_base: <%= rand(2**256).to_s(36) %>
   site_name: Workbench:test
 
index e88229b85158f200ebc6a7df644f9b147fcfd06f..42bf4da24bbf71900d403686cc954badd57660e0 100644 (file)
@@ -2,13 +2,15 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-require File.expand_path('../boot', __FILE__)
+require_relative 'boot'
 
 require "rails"
 # Pick only the frameworks we need:
 require "active_model/railtie"
 require "active_job/railtie"
 require "active_record/railtie"
+# Skip ActiveStorage (new in Rails 5.1)
+# require "active_storage/engine"
 require "action_controller/railtie"
 require "action_mailer/railtie"
 require "action_view/railtie"
@@ -28,6 +30,9 @@ module ArvadosWorkbench
 
     require_relative "arvados_config.rb"
 
+    # Initialize configuration defaults for originally generated Rails version.
+    config.load_defaults 5.1
+
     # Settings in config/environments/* take precedence over those specified here.
     # Application configuration should go into files in config/initializers
     # -- all .rb files in that directory are automatically loaded.
index 8153266683f6161a8666f74843ce6810d093ffc0..6add5911f6238f87ff72b91fef710fc05d9b67ba 100644 (file)
@@ -8,6 +8,7 @@ require 'rubygems'
 ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
 
 require 'bundler/setup' if File.exists?(ENV['BUNDLE_GEMFILE'])
+require 'bootsnap/setup' # Speed up boot time by caching expensive operations.
 
 # Use ARVADOS_API_TOKEN environment variable (if set) in console
 require 'rails'
diff --git a/apps/workbench/config/initializers/content_security_policy.rb b/apps/workbench/config/initializers/content_security_policy.rb
new file mode 100644 (file)
index 0000000..853ecde
--- /dev/null
@@ -0,0 +1,29 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# Be sure to restart your server when you modify this file.
+
+# Define an application-wide content security policy
+# For further information see the following documentation
+# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy
+
+# Rails.application.config.content_security_policy do |policy|
+#   policy.default_src :self, :https
+#   policy.font_src    :self, :https, :data
+#   policy.img_src     :self, :https, :data
+#   policy.object_src  :none
+#   policy.script_src  :self, :https
+#   policy.style_src   :self, :https
+
+#   # Specify URI for violation reports
+#   # policy.report_uri "/csp-violation-report-endpoint"
+# end
+
+# If you are using UJS then enable automatic nonce generation
+# Rails.application.config.content_security_policy_nonce_generator = -> request { SecureRandom.base64(16) }
+
+# Report CSP violations to a specified URI
+# For further information see the following documentation:
+# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy-Report-Only
+# Rails.application.config.content_security_policy_report_only = true
index b8dca33a373171cceda0b9d9f3f2d5f6f1dc68e9..2e2f0b1810df54f5a5a4b15e6a07c5be98e78f67 100644 (file)
@@ -24,6 +24,3 @@ ActiveSupport.to_time_preserves_timezone = false
 
 # Require `belongs_to` associations by default. Previous versions had false.
 Rails.application.config.active_record.belongs_to_required_by_default = false
-
-# Do not halt callback chains when a callback returns false. Previous versions had true.
-ActiveSupport.halt_callback_chains_on_return_false = true
diff --git a/apps/workbench/config/initializers/new_framework_defaults_5_1.rb b/apps/workbench/config/initializers/new_framework_defaults_5_1.rb
new file mode 100644 (file)
index 0000000..804ee6f
--- /dev/null
@@ -0,0 +1,18 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# Be sure to restart your server when you modify this file.
+#
+# This file contains migration options to ease your Rails 5.1 upgrade.
+#
+# Once upgraded flip defaults one by one to migrate to the new default.
+#
+# Read the Guide for Upgrading Ruby on Rails for more info on each option.
+
+# Make `form_with` generate non-remote forms.
+Rails.application.config.action_view.form_with_generates_remote_forms = false
+
+# Unknown asset fallback will return the path passed in when the given
+# asset is not present in the asset pipeline.
+# Rails.application.config.assets.unknown_asset_fallback = false
diff --git a/apps/workbench/config/initializers/new_framework_defaults_5_2.rb b/apps/workbench/config/initializers/new_framework_defaults_5_2.rb
new file mode 100644 (file)
index 0000000..93a8d52
--- /dev/null
@@ -0,0 +1,42 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# Be sure to restart your server when you modify this file.
+#
+# This file contains migration options to ease your Rails 5.2 upgrade.
+#
+# Once upgraded flip defaults one by one to migrate to the new default.
+#
+# Read the Guide for Upgrading Ruby on Rails for more info on each option.
+
+# Make Active Record use stable #cache_key alongside new #cache_version method.
+# This is needed for recyclable cache keys.
+# Rails.application.config.active_record.cache_versioning = true
+
+# Use AES-256-GCM authenticated encryption for encrypted cookies.
+# Also, embed cookie expiry in signed or encrypted cookies for increased security.
+#
+# This option is not backwards compatible with earlier Rails versions.
+# It's best enabled when your entire app is migrated and stable on 5.2.
+#
+# Existing cookies will be converted on read then written with the new scheme.
+# Rails.application.config.action_dispatch.use_authenticated_cookie_encryption = true
+
+# Use AES-256-GCM authenticated encryption as default cipher for encrypting messages
+# instead of AES-256-CBC, when use_authenticated_message_encryption is set to true.
+# Rails.application.config.active_support.use_authenticated_message_encryption = true
+
+# Add default protection from forgery to ActionController::Base instead of in
+# ApplicationController.
+# Rails.application.config.action_controller.default_protect_from_forgery = true
+
+# Store boolean values are in sqlite3 databases as 1 and 0 instead of 't' and
+# 'f' after migrating old data.
+# Rails.application.config.active_record.sqlite3.represent_boolean_as_integer = true
+
+# Use SHA-1 instead of MD5 to generate non-sensitive digests, such as the ETag header.
+# Rails.application.config.active_support.use_sha1_digests = true
+
+# Make `form_with` generate id attributes for any generated HTML tags.
+# Rails.application.config.action_view.form_with_generates_ids = true
index 718adfd2ed0583a99f8eebb221b5eae0c7d012c3..ffc09ac933acf8d88fa1b07cac460c144a805a45 100644 (file)
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-ArvadosWorkbench::Application.routes.draw do
+Rails.application.routes.draw do
   themes_for_rails
 
   resources :keep_disks
index bc8a0d0de5f6852eff8e2c0092909b4f16660ae4..57399082e8a342cfcfaecf2637bafdb9727cd136 100644 (file)
 # no regular words or you'll be exposed to dictionary attacks.
 # You can use `rails secret` to generate a secure secret key.
 
-# Make sure the secrets in this file are kept private
-# if you're sharing your code publicly.
+# NOTE that these get overriden by Arvados' own configuration system.
 
-development:
-  secret_key_base: 33e2d171ec6c67cf8e9a9fbfadc1071328bdab761297e2fe28b9db7613dd542c1ba3bdb3bd3e636d1d6f74ab73a2d90c4e9c0ecc14fde8ccd153045f94e9cc41
+development:
+#   secret_key_base: <%= rand(1<<255).to_s(36) %>
 
-test:
-  secret_key_base: d4c07cab3530fccf5d86565ecdc359eb2a853b8ede3b06edb2885e4423d7a726f50a3e415bb940fd4861e8fec16459665fd377acc8cdd98ea63294d2e0d12bb2
+test:
+#   secret_key_base: <%= rand(1<<255).to_s(36) %>
 
-# Do not keep production secrets in the repository,
-# instead read values from the environment.
+# In case this doesn't get overriden for some reason, assign a random key
+# to gracefully degrade by rejecting cookies instead of by opening a
+# vulnerability.
 production:
-  secret_key_base: <%= ENV["SECRET_KEY_BASE"] %>
+  secret_key_base: <%= rand(1<<255).to_s(36) %>
index ba08f34bcd46ebffd64adb7d387714f4b97d189b..857a9c8ebca0787801da8260d90bd0f0bcfaada2 100755 (executable)
@@ -86,15 +86,12 @@ do
             | *.py \
             | sdk/python/bin/arv-* \
             | sdk/cwl/bin/* \
-            | services/nodemanager/bin/* \
             | services/fuse/bin/* \
             | tools/crunchstat-summary/bin/* \
             | crunch_scripts/* \
             | *.yaml | *.yml | *.yml.example | *.cwl \
             | *.sh | *.service \
             | */run | */run-service | */restart-dns-server \
-            | */nodemanager/doc/*.cfg \
-            | */nodemanager/tests/fake*.cfg.template \
             | */nginx.conf \
             | build/build.list | *.R)
             fixer=fixer
index 8ccab49e1e7d3d9e7c557c48758b8b146386db35..5d204464cff89c27b0e21158fb42bbb77adc12cc 100644 (file)
@@ -6,7 +6,7 @@ FROM centos:7
 MAINTAINER Arvados Package Maintainers <packaging@arvados.org>
 
 # Install dependencies.
-RUN yum -q -y install make automake gcc gcc-c++ libyaml-devel patch readline-devel zlib-devel libffi-devel openssl-devel bzip2 libtool bison sqlite-devel rpm-build git perl-ExtUtils-MakeMaker libattr-devel nss-devel libcurl-devel which tar unzip scl-utils centos-release-scl postgresql-devel python-devel python-setuptools fuse-devel xz-libs git python-virtualenv wget pam-devel
+RUN yum -q -y install make automake gcc gcc-c++ libyaml-devel patch readline-devel zlib-devel libffi-devel openssl-devel bzip2 libtool bison sqlite-devel rpm-build git perl-ExtUtils-MakeMaker libattr-devel nss-devel libcurl-devel which tar unzip scl-utils centos-release-scl postgresql-devel fuse-devel xz-libs git wget pam-devel
 
 # Install RVM
 ADD generated/mpapis.asc /tmp/
@@ -41,16 +41,16 @@ RUN ln -s /usr/local/node-v6.11.2-linux-x64/bin/* /usr/local/bin/
 # Need to "touch" RPM database to workaround bug in interaction between
 # overlayfs and yum (https://bugzilla.redhat.com/show_bug.cgi?id=1213602)
 RUN touch /var/lib/rpm/* && yum -q -y install rh-python36
-RUN scl enable rh-python36 "easy_install-3.6 pip" && easy_install-2.7 pip
+RUN scl enable rh-python36 "easy_install-3.6 pip"
 
 # Add epel, we need it for the python-pam dependency
-RUN wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
-RUN rpm -ivh epel-release-latest-7.noarch.rpm
+#RUN wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
+#RUN rpm -ivh epel-release-latest-7.noarch.rpm
 
 RUN git clone --depth 1 git://git.arvados.org/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle
 
 # The version of setuptools that comes with CentOS is way too old
-RUN pip install --upgrade 'setuptools<45'
+RUN scl enable rh-python36 "easy_install-3.6 pip install 'setuptools<45'"
 
 ENV WORKSPACE /arvados
 CMD ["scl", "enable", "rh-python36", "/usr/local/rvm/bin/rvm-exec default bash /jenkins/run-build-packages.sh --target centos7"]
index 90dfd36b52f66afb6f49c946df761fcd1651ac53..4f306c6aa4e8ca4241e39f87fcbf403b401ab431 100644 (file)
@@ -4,15 +4,15 @@
 
 ## dont use debian:10 here since the word 'buster' is used for rvm precompiled binaries
 FROM debian:buster
-MAINTAINER Ward Vandewege <wvandewege@veritasgenetics.com>
+MAINTAINER Arvados Package Maintainers <packaging@arvados.org>
 
 ENV DEBIAN_FRONTEND noninteractive
 
 # Install dependencies.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools python3-pip libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libgnutls28-dev libpq-dev python-pip unzip python3-venv python3-dev libpam-dev
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python3 python3-setuptools python3-pip libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libgnutls28-dev libpq-dev unzip python3-venv python3-dev libpam-dev
 
 # Install virtualenv
-RUN /usr/bin/pip install 'virtualenv<20'
+RUN /usr/bin/pip3 install 'virtualenv<20'
 
 # Install RVM
 ADD generated/mpapis.asc /tmp/
index 1a84da280898d3010ea6c8bf5978bc0da648f891..5294997f054658d5f3fb5b7366af0d69eab663a8 100644 (file)
@@ -4,15 +4,15 @@
 
 ## dont use debian:9 here since the word 'stretch' is used for rvm precompiled binaries
 FROM debian:stretch
-MAINTAINER Nico Cesar <nico@curoverse.com>
+MAINTAINER Arvados Package Maintainers <packaging@arvados.org>
 
 ENV DEBIAN_FRONTEND noninteractive
 
 # Install dependencies.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools python3-pip libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libgnutls28-dev libpq-dev python-pip unzip python3-venv python3-dev libpam-dev
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python3 python3-setuptools python3-pip libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libgnutls28-dev libpq-dev unzip python3-venv python3-dev libpam-dev
 
 # Install virtualenv
-RUN /usr/bin/pip install 'virtualenv<20'
+RUN /usr/bin/pip3 install 'virtualenv<20'
 
 # Install RVM
 ADD generated/mpapis.asc /tmp/
index 87f7712d50be68aceb65612b33154bc267b0a10c..202bab651322dd9d91cd8ea415a7146b5931f9ce 100644 (file)
@@ -8,10 +8,10 @@ MAINTAINER Arvados Package Maintainers <packaging@arvados.org>
 ENV DEBIAN_FRONTEND noninteractive
 
 # Install dependencies.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools python3-pip libcurl4-gnutls-dev libgnutls-dev curl git libattr1-dev libfuse-dev libpq-dev python-pip unzip tzdata python3-venv python3-dev libpam-dev
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python3 python-setuptools python3-setuptools python3-pip libcurl4-gnutls-dev libgnutls-dev curl git libattr1-dev libfuse-dev libpq-dev unzip tzdata python3-venv python3-dev libpam-dev
 
 # Install virtualenv
-RUN /usr/bin/pip install 'virtualenv<20'
+RUN /usr/bin/pip3 install 'virtualenv<20'
 
 # Install RVM
 ADD generated/mpapis.asc /tmp/
index a2ec29da1cf3932134b3f524608fbcb0c0b72691..05023aa09af50e5384e69db80ed5b253c91d72bb 100644 (file)
@@ -8,10 +8,10 @@ MAINTAINER Arvados Package Maintainers <packaging@arvados.org>
 ENV DEBIAN_FRONTEND noninteractive
 
 # Install dependencies.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-pip libcurl4-gnutls-dev libgnutls28-dev curl git libattr1-dev libfuse-dev libpq-dev python-pip unzip tzdata python3-venv python3-dev libpam-dev
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python3 python3-pip libcurl4-gnutls-dev libgnutls28-dev curl git libattr1-dev libfuse-dev libpq-dev unzip tzdata python3-venv python3-dev libpam-dev
 
 # Install virtualenv
-RUN /usr/bin/pip install 'virtualenv<20'
+RUN /usr/bin/pip3 install 'virtualenv<20'
 
 # Install RVM
 ADD generated/mpapis.asc /tmp/
diff --git a/build/package-testing/test-package-arvados-docker-cleaner.sh b/build/package-testing/test-package-arvados-docker-cleaner.sh
new file mode 100755 (executable)
index 0000000..6b344de
--- /dev/null
@@ -0,0 +1,8 @@
+#!/bin/sh
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+set -e
+
+arvados-docker-cleaner -h >/dev/null
diff --git a/build/package-testing/test-package-arvados-node-manager.sh b/build/package-testing/test-package-arvados-node-manager.sh
deleted file mode 100755 (executable)
index 9300f4c..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/sh
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-set -e
-
-arvados-node-manager --version
-
-exec /usr/share/python2.7/dist/arvados-node-manager/bin/python2.7 <<EOF
-import libcloud.compute.types
-import libcloud.compute.providers
-libcloud.compute.providers.get_driver(libcloud.compute.types.Provider.AZURE_ARM)
-print "Successfully imported compatible libcloud library"
-EOF
diff --git a/build/package-testing/test-package-python-arvados-cwl-runner.sh b/build/package-testing/test-package-python-arvados-cwl-runner.sh
deleted file mode 120000 (symlink)
index 61e61b1..0000000
+++ /dev/null
@@ -1 +0,0 @@
-test-package-python27-python-arvados-cwl-runner.sh
\ No newline at end of file
diff --git a/build/package-testing/test-package-python-arvados-fuse.sh b/build/package-testing/test-package-python-arvados-fuse.sh
deleted file mode 120000 (symlink)
index 3b9232c..0000000
+++ /dev/null
@@ -1 +0,0 @@
-test-package-python27-python-arvados-fuse.sh
\ No newline at end of file
diff --git a/build/package-testing/test-package-python-arvados-python-client.sh b/build/package-testing/test-package-python-arvados-python-client.sh
deleted file mode 120000 (symlink)
index 8a4d0ea..0000000
+++ /dev/null
@@ -1 +0,0 @@
-test-package-python27-python-arvados-python-client.sh
\ No newline at end of file
diff --git a/build/package-testing/test-package-python-cwltest.sh b/build/package-testing/test-package-python-cwltest.sh
deleted file mode 120000 (symlink)
index 9b6545b..0000000
+++ /dev/null
@@ -1 +0,0 @@
-test-package-python27-python-cwltest.sh
\ No newline at end of file
diff --git a/build/package-testing/test-package-python27-python-arvados-python-client.sh b/build/package-testing/test-package-python27-python-arvados-python-client.sh
deleted file mode 100755 (executable)
index 2c92a3e..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/sh
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-set -e
-
-arv-put --version
-
-/usr/share/python2.7/dist/python-arvados-python-client/bin/python2.7 << EOF
-import arvados
-print "Successfully imported arvados"
-EOF
index 99327c016ad618dbf69971a0960e19def60469e9..ebf7b5becda9d94e371c462ce7bc5de278c82c1b 100755 (executable)
@@ -5,4 +5,4 @@
 
 set -e
 
-arvados-cwl-runner --version
+arvados-cwl-runner --version >/dev/null
index d4e66a27b9510ca06b50c2704c8e2bdee70a17d9..69f728c10e5c335967fac801c9f131726bce18a6 100755 (executable)
@@ -5,7 +5,7 @@
 
 set -e
 
-arv-put --version
+arv-put --version >/dev/null
 
 /usr/share/python3/dist/python3-arvados-python-client/bin/python3 << EOF
 import arvados
old mode 100644 (file)
new mode 100755 (executable)
similarity index 66%
rename from services/nodemanager/arvnodeman/test/__init__.py
rename to build/package-testing/test-package-python3-crunchstat-summary.sh
index d3ac1c2..02b6e0d
@@ -1,5 +1,8 @@
+#!/bin/sh
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
 # SPDX-License-Identifier: AGPL-3.0
 
+set -e
 
+crunchstat-summary -h >/dev/null
similarity index 79%
rename from build/package-testing/test-package-python27-python-arvados-cwl-runner.sh
rename to build/package-testing/test-package-python3-cwltest.sh
index 99327c016ad618dbf69971a0960e19def60469e9..77f1f44016d80bfe3e19c33cde150e8da65e1778 100755 (executable)
@@ -5,4 +5,4 @@
 
 set -e
 
-arvados-cwl-runner --version
+cwltest -h >/dev/null
deleted file mode 120000 (symlink)
index 3b9232c5fa6ccac4a9f1fdaf3e8b1703934959ed..0000000000000000000000000000000000000000
+++ /dev/null
@@ -1 +0,0 @@
-test-package-python27-python-arvados-fuse.sh
\ No newline at end of file
new file mode 100755 (executable)
index 0000000000000000000000000000000000000000..81929857b8eaa6791a3e47e196f578de6f17b9a0
--- /dev/null
@@ -0,0 +1,8 @@
+#!/bin/sh
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+set -e
+
+arv-mount --version
diff --git a/build/package-testing/test-package-rh-python36-python-arvados-cwl-runner.sh b/build/package-testing/test-package-rh-python36-python-arvados-cwl-runner.sh
new file mode 100755 (executable)
index 0000000..ebf7b5b
--- /dev/null
@@ -0,0 +1,8 @@
+#!/bin/sh
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+set -e
+
+arvados-cwl-runner --version >/dev/null
diff --git a/build/package-testing/test-package-rh-python36-python-crunchstat-summary.sh b/build/package-testing/test-package-rh-python36-python-crunchstat-summary.sh
new file mode 100755 (executable)
index 0000000..02b6e0d
--- /dev/null
@@ -0,0 +1,8 @@
+#!/bin/sh
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+set -e
+
+crunchstat-summary -h >/dev/null
similarity index 74%
rename from build/package-testing/test-package-python27-python-cwltest.sh
rename to build/package-testing/test-package-rh-python36-python-cwltest.sh
index 395cefc5138ceba7647ad35995c1c8860466e424..77f1f44016d80bfe3e19c33cde150e8da65e1778 100755 (executable)
@@ -3,6 +3,6 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-exec python <<EOF
-import cwltest
-EOF
+set -e
+
+cwltest -h >/dev/null
index 0d720bde147d58cf26600ff784b7a34f92effaea..2930957b942affd86326248e6e0e4a3efb3166f9 100644 (file)
@@ -12,7 +12,7 @@ Since our build process is a tower of shell scripts, concatenating files seemed
 
 postinst.sh lets the early parts define a few hooks to control behavior:
 
-* After it installs the core configuration files (database.yml, application.yml, and production.rb) to /etc/arvados/server, it calls setup_extra_conffiles.  By default this is a noop function (in step2.sh).  API server defines this to set up the old omniauth.rb conffile.
+* After it installs the core configuration files (database.yml, application.yml, and production.rb) to /etc/arvados/server, it calls setup_extra_conffiles.  By default this is a noop function (in step2.sh).
 * Before it restarts nginx, it calls setup_before_nginx_restart.  By default this is a noop function (in step2.sh).  API server defines this to set up the internal git repository, if necessary.
 * $RAILSPKG_DATABASE_LOAD_TASK defines the Rake task to load the database.  API server uses db:structure:load.  SSO server uses db:schema:load.  Workbench doesn't set this, which causes the postinst to skip all database work.
 * If $RAILSPKG_SUPPORTS_CONFIG_CHECK != 1, it won't run the config:check rake task.  SSO clears this flag (it doesn't have that task code).
index 82bc9898aa87c350b38774db6db349294330bc9f..027383ab4f62294aa9661b5e9ce1651457f3bf1a 100644 (file)
@@ -12,7 +12,9 @@ DOC_URL="http://doc.arvados.org/install/install-api-server.html#configure"
 
 RAILSPKG_DATABASE_LOAD_TASK=db:structure:load
 setup_extra_conffiles() {
-    setup_conffile initializers/omniauth.rb
+  # Rails 5.2 does not tolerate dangling symlinks in the initializers directory, and this one
+  # can still be there, left over from a previous version of the API server package.
+  rm -f $RELEASE_PATH/config/initializers/omniauth.rb
 }
 
 setup_before_nginx_restart() {
index 9816b14c6d5a975a1aafd51f7b47033196efd38c..6773b6f308080314fca9e6e4b7703567ab14a12a 100644 (file)
@@ -10,7 +10,6 @@ remove () {
   rm -f $RELEASE_PATH/config/environments/production.rb
   rm -f $RELEASE_PATH/config/application.yml
   # Old API server configuration file.
-  rm -f $RELEASE_PATH/config/initializers/omniauth.rb
   rm -rf $RELEASE_PATH/public/assets/
   rm -rf $RELEASE_PATH/tmp
   rm -rf $RELEASE_PATH/.bundle
index ec8357701d067fe0b17bdc2df01f17a1bf4f948e..d1fb2ac67054dfdc31ce8a31401747c3a55aefbf 100755 (executable)
@@ -185,28 +185,23 @@ if docker --version |grep " 1\.[0-9]\." ; then
     FORCE=-f
 fi
 
-#docker export arvados/jobs:$cwl_runner_version_orig | docker import - arvados/jobs:$cwl_runner_version_orig
-
 if ! [[ -z "$version_tag" ]]; then
     docker tag $FORCE arvados/jobs:$cwl_runner_version_orig arvados/jobs:"$version_tag"
-else
-    docker tag $FORCE arvados/jobs:$cwl_runner_version_orig arvados/jobs:latest
-fi
+    ECODE=$?
 
-ECODE=$?
+    if [[ "$ECODE" != "0" ]]; then
+        EXITCODE=$(($EXITCODE + $ECODE))
+    fi
 
-if [[ "$ECODE" != "0" ]]; then
-    EXITCODE=$(($EXITCODE + $ECODE))
+    checkexit $ECODE "docker tag"
+    title "docker tag complete (`timer`)"
 fi
 
-checkexit $ECODE "docker tag"
-title "docker tag complete (`timer`)"
-
 title "uploading images"
 
 timer_reset
 
-if [[ "$ECODE" != "0" ]]; then
+if [[ "$EXITCODE" != "0" ]]; then
     title "upload arvados images SKIPPED because build or tag failed"
 else
     if [[ $upload == true ]]; then
@@ -217,7 +212,6 @@ else
             docker_push arvados/jobs:"$version_tag"
         else
            docker_push arvados/jobs:$cwl_runner_version_orig
-           docker_push arvados/jobs:latest
         fi
         title "upload arvados images finished (`timer`)"
     else
index f8816dbe4873c3fad3773d47590393d1e62b5550..d0a79ad3dfa2fdf04cab380f321602fac66df618 100755 (executable)
@@ -192,27 +192,47 @@ popd
 if test -z "$packages" ; then
     packages="arvados-api-server
         arvados-client
+        arvados-controller
+        arvados-dispatch-cloud
         arvados-docker-cleaner
         arvados-git-httpd
-        arvados-node-manager
+        arvados-health
+        arvados-server
         arvados-src
+        arvados-sync-groups
         arvados-workbench
+        arvados-workbench2
+        arvados-ws
         crunch-dispatch-local
         crunch-dispatch-slurm
         crunch-run
         crunchstat
+        keepproxy
+        keepstore
         keep-balance
         keep-block-check
-        keepproxy
         keep-rsync
-        keepstore
+        keep-exercise
+        keep-rsync
+        keep-block-check
         keep-web
         libarvados-perl
-        libpam-arvados
-        libpam-arvados-go
-        python-arvados-fuse
-        python-arvados-python-client
-        python-arvados-cwl-runner"
+        libpam-arvados-go"
+    if [[ "$TARGET" =~ "centos" ]]; then
+      packages="$packages
+        rh-python36-python-cwltest
+        rh-python36-python-arvados-fuse
+        rh-python36-python-arvados-python-client
+        rh-python36-python-arvados-cwl-runner
+        rh-python36-python-crunchstat-summary"
+    else
+      packages="$packages
+        python3-cwltest
+        python3-arvados-fuse
+        python3-arvados-python-client
+        python3-arvados-cwl-runner
+        python3-crunchstat-summary"
+    fi
 fi
 
 FINAL_EXITCODE=0
index ba44218c4e8f076a8ab7d0a8917b5cd40cecb547..f3b7564d714f41492c8ff55933707a98c99086fb 100755 (executable)
@@ -192,10 +192,8 @@ PYTHON_BUILD_FAILURES=0
 if [ $PYTHON -eq 1 ]; then
   debug_echo "Building Python packages"
   python_wrapper arvados-python-client "$WORKSPACE/sdk/python"
-  python_wrapper arvados-pam "$WORKSPACE/sdk/pam"
   python_wrapper arvados-cwl-runner "$WORKSPACE/sdk/cwl"
   python_wrapper arvados_fuse "$WORKSPACE/services/fuse"
-  python_wrapper arvados-node-manager "$WORKSPACE/services/nodemanager"
 
   if [ $((${#failures[@]} - $GEM_BUILD_FAILURES)) -ne 0 ]; then
     PYTHON_BUILD_FAILURES=$((${#failures[@]} - $GEM_BUILD_FAILURES))
index 5aa0b7e6f8e363642cf3aebfa6bff44d28926d2d..0e74ac6f2570761d34cfc91d58b36d16c1fa812d 100755 (executable)
@@ -102,18 +102,12 @@ if [[ "$DEBUG" != 0 ]]; then
     DASHQ_UNLESS_DEBUG=
 fi
 
-declare -a PYTHON_BACKPORTS PYTHON3_BACKPORTS
+declare -a PYTHON3_BACKPORTS
 
-PYTHON2_VERSION=2.7
 PYTHON3_VERSION=$(python3 -c 'import sys; print("{v.major}.{v.minor}".format(v=sys.version_info))')
 
 ## These defaults are suitable for any Debian-based distribution.
 # You can customize them as needed in distro sections below.
-PYTHON2_PACKAGE=python$PYTHON2_VERSION
-PYTHON2_PKG_PREFIX=python
-PYTHON2_PREFIX=/usr
-PYTHON2_INSTALL_LIB=lib/python$PYTHON2_VERSION/dist-packages
-
 PYTHON3_PACKAGE=python$PYTHON3_VERSION
 PYTHON3_PKG_PREFIX=python3
 PYTHON3_PREFIX=/usr
@@ -129,9 +123,6 @@ case "$TARGET" in
         ;;
     centos*)
         FORMAT=rpm
-        PYTHON2_PACKAGE=$(rpm -qf "$(which python$PYTHON2_VERSION)" --queryformat '%{NAME}\n')
-        PYTHON2_PKG_PREFIX=$PYTHON2_PACKAGE
-        PYTHON2_INSTALL_LIB=lib/python$PYTHON2_VERSION/site-packages
         PYTHON3_PACKAGE=$(rpm -qf "$(which python$PYTHON3_VERSION)" --queryformat '%{NAME}\n')
         PYTHON3_PKG_PREFIX=$PYTHON3_PACKAGE
         PYTHON3_PREFIX=/opt/rh/rh-python36/root/usr
@@ -321,29 +312,17 @@ package_go_binary tools/keep-exercise keep-exercise \
 package_go_so lib/pam pam_arvados.so libpam-arvados-go \
     "Arvados PAM authentication module"
 
-# The Python SDK - Should be built first because it's needed by others
-fpm_build_virtualenv "arvados-python-client" "sdk/python"
-
 # The Python SDK - Python3 package
 fpm_build_virtualenv "arvados-python-client" "sdk/python" "python3"
 
-# Arvados cwl runner - Only supports Python3 now
+# Arvados cwl runner - Python3 package
 fpm_build_virtualenv "arvados-cwl-runner" "sdk/cwl" "python3"
 
-# The PAM module
-fpm_build_virtualenv "libpam-arvados" "sdk/pam"
-
-# The FUSE driver
-fpm_build_virtualenv "arvados-fuse" "services/fuse"
-
 # The FUSE driver - Python3 package
 fpm_build_virtualenv "arvados-fuse" "services/fuse" "python3"
 
-# The node manager
-fpm_build_virtualenv "arvados-node-manager" "services/nodemanager"
-
 # The Arvados crunchstat-summary tool
-fpm_build_virtualenv "crunchstat-summary" "tools/crunchstat-summary"
+fpm_build_virtualenv "crunchstat-summary" "tools/crunchstat-summary" "python3"
 
 # The Docker image cleaner
 fpm_build_virtualenv "arvados-docker-cleaner" "services/dockercleaner" "python3"
@@ -354,11 +333,9 @@ if [[ -e "$WORKSPACE/cwltest" ]]; then
        rm -rf "$WORKSPACE/cwltest"
 fi
 git clone https://github.com/common-workflow-language/cwltest.git
-# last release to support python 2.7
-(cd cwltest && git checkout 1.0.20190906212748)
 # signal to our build script that we want a cwltest executable installed in /usr/bin/
 mkdir cwltest/bin && touch cwltest/bin/cwltest
-fpm_build_virtualenv "cwltest" "cwltest"
+fpm_build_virtualenv "cwltest" "cwltest" "python3"
 rm -rf "$WORKSPACE/cwltest"
 
 calculate_go_package_version arvados_server_version cmd/arvados-server
index 3e6c9f85841d55be0e7d9794c4e86a693e5500c3..528d69d9982eac69e561a3ab7078488a94093d61 100755 (executable)
@@ -231,10 +231,6 @@ default_iteration() {
            [[ ${BASH_REMATCH[1]} -le $LICENSE_PACKAGE_TS ]]; then
         iteration=2
     fi
-    if [[ $package_type =~ ^python ]]; then
-      # Fix --iteration for #9242.
-      iteration=2
-    fi
     echo $iteration
 }
 
@@ -487,18 +483,9 @@ fpm_build_virtualenv () {
         fi
         PACKAGE_PREFIX=$PYTHON3_PKG_PREFIX
         ;;
-    python)
-        # All Arvados Python2 packages depend on Python 2.7.
-        # Make sure we build with that for consistency.
-        python=python2.7
-        pip=pip
-        PACKAGE_PREFIX=$PYTHON2_PKG_PREFIX
-        ;;
   esac
 
-  if [[ "$PKG" != "libpam-arvados" ]] &&
-     [[ "$PKG" != "arvados-node-manager" ]] &&
-     [[ "$PKG" != "arvados-docker-cleaner" ]]; then
+  if [[ "$PKG" != "arvados-docker-cleaner" ]]; then
     PYTHON_PKG=$PACKAGE_PREFIX-$PKG
   else
     # Exception to our package naming convention
@@ -651,25 +638,6 @@ fpm_build_virtualenv () {
   LICENSE_STRING=`grep license $WORKSPACE/$PKG_DIR/setup.py|cut -f2 -d=|sed -e "s/[',\\"]//g"`
   COMMAND_ARR+=('--license' "$LICENSE_STRING")
 
-  # 12271 - As FPM-generated packages don't include scripts by default, the
-  # packages cleanup on upgrade depends on files being listed on the %files
-  # section in the generated SPEC files. To remove DIRECTORIES, they need to
-  # be listed in that section too, so we need to add this parameter to properly
-  # remove lingering dirs. But this only works for python2: if used on
-  # python33, it includes dirs like /opt/rh/python33 that belong to
-  # other packages.
-  if [[ "$FORMAT" == "rpm" ]] && [[ "$python" == "python2.7" ]]; then
-    COMMAND_ARR+=('--rpm-auto-add-directories')
-  fi
-
-  if [[ "$PKG" == "arvados-python-client" ]] || [[ "$PKG" == "arvados-fuse" ]]; then
-    if [[ "$python" == "python2.7" ]]; then
-      COMMAND_ARR+=('--conflicts' "$PYTHON3_PKG_PREFIX-$PKG")
-    else
-      COMMAND_ARR+=('--conflicts' "$PYTHON2_PKG_PREFIX-$PKG")
-    fi
-  fi
-
   if [[ "$DEBUG" != "0" ]]; then
     COMMAND_ARR+=('--verbose' '--log' 'info')
   fi
@@ -685,11 +653,7 @@ fpm_build_virtualenv () {
     COMMAND_ARR+=('--before-remove' "${WORKSPACE}/build/go-python-package-scripts/prerm")
   fi
 
-  if [[ "$python" == "python2.7" ]]; then
-    COMMAND_ARR+=('--depends' "$PYTHON2_PACKAGE")
-  else
-    COMMAND_ARR+=('--depends' "$PYTHON3_PACKAGE")
-  fi
+  COMMAND_ARR+=('--depends' "$PYTHON3_PACKAGE")
 
   # avoid warning
   COMMAND_ARR+=('--deb-no-default-config-files')
@@ -714,7 +678,7 @@ fpm_build_virtualenv () {
   done
 
   # make sure the systemd service file ends up in the right place
-  # used by arvados-docker-cleaner and arvados-node-manager
+  # used by arvados-docker-cleaner
   if [[ -e "${systemd_unit}" ]]; then
     COMMAND_ARR+=("usr/share/$python/dist/$PKG/share/doc/$PKG/$PKG.service=/lib/systemd/system/$PKG.service")
   fi
@@ -733,15 +697,6 @@ fpm_build_virtualenv () {
     done
   fi
 
-  # the libpam module should place a few files in the correct place for the pam
-  # subsystem
-  if [[ -e "$WORKSPACE/$PKG_DIR/dist/build/usr/share/$python/dist/$PYTHON_PKG/lib/security/libpam_arvados.py" ]]; then
-    COMMAND_ARR+=("usr/share/$python/dist/$PYTHON_PKG/lib/security/libpam_arvados.py=/usr/lib/security/")
-  fi
-  if [[ -e "$WORKSPACE/$PKG_DIR/dist/build/usr/share/$python/dist/$PYTHON_PKG/share/pam-configs/arvados" ]]; then
-    COMMAND_ARR+=("usr/share/$python/dist/$PYTHON_PKG/share/pam-configs/arvados=/usr/share/pam-configs/")
-  fi
-
   # the python-arvados-cwl-runner package comes with cwltool, expose that version
   if [[ -e "$WORKSPACE/$PKG_DIR/dist/build/usr/share/python2.7/dist/python-arvados-cwl-runner/bin/cwltool" ]]; then
     COMMAND_ARR+=("usr/share/python2.7/dist/python-arvados-cwl-runner/bin/cwltool=/usr/bin/")
@@ -802,17 +757,6 @@ fpm_build () {
       COMMAND_ARR+=(--deb-ignore-iteration-in-dependencies)
   fi
 
-  # 12271 - As FPM-generated packages don't include scripts by default, the
-  # packages cleanup on upgrade depends on files being listed on the %files
-  # section in the generated SPEC files. To remove DIRECTORIES, they need to
-  # be listed in that section too, so we need to add this parameter to properly
-  # remove lingering dirs. But this only works for python2: if used on
-  # python33, it includes dirs like /opt/rh/python33 that belong to
-  # other packages.
-  if [[ "$FORMAT" = rpm ]] && [[ "$python" = python2.7 ]]; then
-    COMMAND_ARR+=('--rpm-auto-add-directories')
-  fi
-
   if [[ "$DEBUG" != "0" ]]; then
     COMMAND_ARR+=('--verbose' '--log' 'info')
   fi
index 8ba97a55356dc51469295f5920173f50aabfe82d..6c697a657ba10c829e86db0f0f030e12fd76eb0c 100755 (executable)
@@ -91,6 +91,7 @@ lib/dispatchcloud/scheduler
 lib/dispatchcloud/ssh_executor
 lib/dispatchcloud/worker
 lib/mount
+lib/pam
 lib/service
 services/api
 services/arv-git-httpd
@@ -104,14 +105,10 @@ services/keepproxy
 services/keepstore
 services/keep-balance
 services/login-sync
-services/nodemanager
-services/nodemanager_integration
 services/crunch-dispatch-local
 services/crunch-dispatch-slurm
 services/ws
 sdk/cli
-sdk/pam
-sdk/pam:py3
 sdk/python
 sdk/python:py3
 sdk/ruby
@@ -198,7 +195,7 @@ sanity_checks() {
     ( [[ -n "$WORKSPACE" ]] && [[ -d "$WORKSPACE/services" ]] ) \
         || fatal "WORKSPACE environment variable not set to a source directory (see: $0 --help)"
     [[ -z "$CONFIGSRC" ]] || [[ -s "$CONFIGSRC/config.yml" ]] \
-       || fatal "CONFIGSRC is $CONFIGSRC but '$CONFIGSRC/config.yml' is empty or not found (see: $0 --help)"
+        || fatal "CONFIGSRC is $CONFIGSRC but '$CONFIGSRC/config.yml' is empty or not found (see: $0 --help)"
     echo Checking dependencies:
     echo "locale: ${LANG}"
     [[ "$(locale charmap)" = "UTF-8" ]] \
@@ -262,7 +259,7 @@ sanity_checks() {
         || fatal "No libpq libpq-fe.h. Try: apt-get install libpq-dev"
     echo -n 'libpam pam_appl.h: '
     find /usr/include -path '*/security/pam_appl.h' | egrep --max-count=1 . \
-        || fatal "No libpam pam_appl.h. Try: apt-get install libpam-dev"
+        || fatal "No libpam pam_appl.h. Try: apt-get install libpam0g-dev"
     echo -n 'postgresql: '
     psql --version || fatal "No postgresql. Try: apt-get install postgresql postgresql-client-common"
     echo -n 'phantomjs: '
@@ -306,8 +303,6 @@ declare -A skip
 declare -A only
 declare -A testargs
 skip[apps/workbench_profile]=1
-# nodemanager_integration tests are not reliable, see #12061.
-skip[services/nodemanager_integration]=1
 
 while [[ -n "$1" ]]
 do
@@ -378,7 +373,7 @@ if [[ ${skip["sdk/R"]} == 1 && ${skip["doc"]} == 1 ]]; then
 fi
 
 if [[ $NEED_SDK_R == false ]]; then
-       echo "R SDK not needed, it will not be installed."
+        echo "R SDK not needed, it will not be installed."
 fi
 
 checkpidfile() {
@@ -419,11 +414,11 @@ start_services() {
     . "$VENVDIR/bin/activate"
     echo 'Starting API, controller, keepproxy, keep-web, arv-git-httpd, ws, and nginx ssl proxy...'
     if [[ ! -d "$WORKSPACE/services/api/log" ]]; then
-       mkdir -p "$WORKSPACE/services/api/log"
+        mkdir -p "$WORKSPACE/services/api/log"
     fi
     # Remove empty api.pid file if it exists
     if [[ -f "$WORKSPACE/tmp/api.pid" && ! -s "$WORKSPACE/tmp/api.pid" ]]; then
-       rm -f "$WORKSPACE/tmp/api.pid"
+        rm -f "$WORKSPACE/tmp/api.pid"
     fi
     all_services_stopped=
     fail=1
@@ -668,14 +663,6 @@ install_env() {
         python setup.py install
     ) || fatal "installing PyYAML and sdk/python failed"
 
-    # Preinstall libcloud if using a fork; otherwise nodemanager "pip
-    # install" won't pick it up by default.
-    if [[ -n "$LIBCLOUD_PIN_SRC" ]]; then
-        pip freeze 2>/dev/null | egrep ^apache-libcloud==$LIBCLOUD_PIN \
-            || pip install --pre --ignore-installed --no-cache-dir "$LIBCLOUD_PIN_SRC" >/dev/null \
-            || fatal "pip install apache-libcloud failed"
-    fi
-
     # Deactivate Python 2 virtualenv
     deactivate
 
@@ -722,9 +709,6 @@ do_test() {
         apps/workbench_units | apps/workbench_functionals | apps/workbench_integration)
             suite=apps/workbench
             ;;
-        services/nodemanager | services/nodemanager_integration)
-            suite=services/nodemanager_suite
-            ;;
         *)
             suite="${1}"
             ;;
@@ -833,19 +817,19 @@ do_test_once() {
 
 check_arvados_config() {
     if [[ "$1" = "env" ]] ; then
-       return
+        return
     fi
     if [[ -z "$ARVADOS_CONFIG" ]] ; then
-       # Create config file.  The run_test_server script requires PyYAML,
-       # so virtualenv needs to be active.  Downstream steps like
-       # workbench install which require a valid config.yml.
-       if [[ ! -s "$VENVDIR/bin/activate" ]] ; then
-           install_env
-       fi
-       . "$VENVDIR/bin/activate"
+        # Create config file.  The run_test_server script requires PyYAML,
+        # so virtualenv needs to be active.  Downstream steps like
+        # workbench install which require a valid config.yml.
+        if [[ ! -s "$VENVDIR/bin/activate" ]] ; then
+            install_env
+        fi
+        . "$VENVDIR/bin/activate"
         cd "$WORKSPACE"
-       eval $(python sdk/python/tests/run_test_server.py setup_config)
-       deactivate
+        eval $(python sdk/python/tests/run_test_server.py setup_config)
+        deactivate
     fi
 }
 
@@ -1004,14 +988,12 @@ install_services/api() {
 
 declare -a pythonstuff
 pythonstuff=(
-    sdk/pam
     sdk/python
     sdk/python:py3
     sdk/cwl:py3
     services/dockercleaner:py3
     services/fuse
     services/fuse:py3
-    services/nodemanager
     tools/crunchstat-summary
     tools/crunchstat-summary:py3
 )
@@ -1076,11 +1058,6 @@ test_services/login-sync() {
         && "$bundle" exec rake test TESTOPTS=-v ${testargs[services/login-sync]}
 }
 
-test_services/nodemanager_integration() {
-    cd "$WORKSPACE/services/nodemanager" \
-        && tests/integration_test.py ${testargs[services/nodemanager_integration]}
-}
-
 test_apps/workbench_units() {
     local TASK="test:units"
     cd "$WORKSPACE/apps/workbench" \
@@ -1175,7 +1152,6 @@ test_all() {
     do_test sdk/cli
     do_test services/login-sync
     do_test sdk/java-v2
-    do_test services/nodemanager_integration
     for p in "${pythonstuff[@]}"
     do
         dir=${p%:py3}
index 20b9139986d38bfff620452945ae87b9ae7affa9..968ca51fa92dc5a840ed247ae6626a11c676e240 100644 (file)
@@ -162,6 +162,7 @@ navbar:
       - admin/migrating-providers.html.textile.liquid
       - user/topics/arvados-sync-groups.html.textile.liquid
       - admin/scoped-tokens.html.textile.liquid
+      - admin/token-expiration-policy.html.textile.liquid
     - Monitoring:
       - admin/logging.html.textile.liquid
       - admin/metrics.html.textile.liquid
index 881227b3fa9a84ce084f107de771aa862c1949c5..abdd8db734e7522f61acbcfbf0610ace401d38fe 100644 (file)
@@ -16,24 +16,6 @@ Services must have ManagementToken configured.  This is used to authorize access
 
 To access a monitoring endpoint, the requester must provide the HTTP header @Authorization: Bearer (ManagementToken)@.
 
-h2. Node Manager
-
-Set @port@ (the listen port) and @ManagementToken@ in the @Manage@ section of @node-manager.ini@.
-
-<pre>
-[Manage]
-# The management server responds to http://addr:port/status.json with
-# a snapshot of internal state.
-
-# Management server listening address (default 127.0.0.1)
-#address = 0.0.0.0
-
-# Management server port number (default -1, server is disabled)
-#port = 8989
-
-ManagementToken = xxx
-</pre>
-
 h2. API server and other services
 
 The following services also support monitoring.
@@ -45,7 +27,7 @@ The following services also support monitoring.
 * keepproxy
 * keepstore
 * keep-web
-* websockets
+* arvados-ws 
 
 Set @ManagementToken@ in the appropriate section of @/etc/arvados/config.yml@.
 
index 1d6b87da62116027a96788c8fe7b73c44a269133..0cfa0a2e604cc0ee40bcbe3cc1a44836b3247b72 100644 (file)
@@ -35,7 +35,6 @@ table(table table-bordered table-condensed table-hover).
 |arvados-controller|✓|
 |arvados-dispatch-cloud|✓|
 |arvados-git-httpd||
-|arvados-node-manager||
 |arvados-ws|✓|
 |composer||
 |keepproxy||
@@ -44,48 +43,3 @@ table(table table-bordered table-condensed table-hover).
 |keep-web|✓|
 |workbench1||
 |workbench2||
-
-h2. Node manager
-
-The node manager does not export prometheus-style metrics, but its @/status.json@ endpoint provides a snapshot of internal status at the time of the most recent wishlist update.
-
-<pre>curl -sfH "Authorization: Bearer your_management_token_goes_here" "http://0.0.0.0:8989/status.json"
-</pre>
-
-table(table table-bordered table-condensed).
-|_. Attribute|_. Type|_. Description|
-|nodes_booting|int|Number of nodes in booting state|
-|nodes_unpaired|int|Number of nodes in unpaired state|
-|nodes_busy|int|Number of nodes in busy state|
-|nodes_idle|int|Number of nodes in idle state|
-|nodes_fail|int|Number of nodes in fail state|
-|nodes_down|int|Number of nodes in down state|
-|nodes_shutdown|int|Number of nodes in shutdown state|
-|nodes_wish|int|Number of nodes in the current wishlist|
-|node_quota|int|Current node count ceiling due to cloud quota limits|
-|config_max_nodes|int|Configured max node count|
-
-h3. Example
-
-<pre>
-{
-  "actor_exceptions": 0,
-  "idle_times": {
-    "compute1": 0,
-    "compute3": 0,
-    "compute2": 0,
-    "compute4": 0
-  },
-  "create_node_errors": 0,
-  "destroy_node_errors": 0,
-  "nodes_idle": 0,
-  "config_max_nodes": 8,
-  "list_nodes_errors": 0,
-  "node_quota": 8,
-  "Version": "1.1.4.20180719160944",
-  "nodes_wish": 0,
-  "nodes_unpaired": 0,
-  "nodes_busy": 4,
-  "boot_failures": 0
-}
-</pre>
index 5bad5f25b3edff7e6ae7321e3742e8d4039ff149..18578a78d683cb02d58c836b03b36362fbabc4bf 100644 (file)
@@ -4,6 +4,12 @@ navsection: admin
 title: Securing API access with scoped tokens
 ...
 
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
 By default, Arvados API tokens grant unlimited access to a user account, and admin account tokens have unlimited access to the whole system.  If you want to grant restricted access to a user account, you can create a "scoped token" which is an Arvados API token which is limited to accessing specific APIs.
 
 One use of token scopes is to grant access to data, such as a collection, to users who do not have an Arvados accounts on your cluster.  This is done by creating scoped token that only allows getting a specific record.  An example of this is "creating a collection sharing link.":{{site.baseurl}}/sdk/python/cookbook.html#sharing_link
index bc0600e22333e6e8bc6f45a927797128253a6b29..7f49d6961292f7371436cb04cbe3892a1a0efadb 100644 (file)
@@ -25,14 +25,14 @@ Clusters:
       UsePreemptibleInstances: true
     InstanceTypes:
       m4.large:
-       Preemptible: false
+        Preemptible: false
         ProviderType: m4.large
         VCPUs: 2
         RAM: 8GiB
         AddedScratch: 32GB
         Price: 0.1
       m4.large.spot:
-       Preemptible: true
+        Preemptible: true
         ProviderType: m4.large
         VCPUs: 2
         RAM: 8GiB
@@ -44,8 +44,6 @@ When @UsePreemptibleInstances@ is enabled, child containers (workflow steps) wil
 
 If you are using "arvados-dispatch-cloud":{{site.baseurl}}/install/crunch2-cloud/install-dispatch-cloud.html no additional configuration is required.
 
-If you are using the legacy Nodemanager, "see below":#nodemanager .
-
 h2. Preemptible instances on AWS
 
 For general information, see "using Amazon EC2 spot instances":https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-spot-instances.html .
@@ -62,22 +60,7 @@ The account needs to have a service linked role created. This can be done by log
 
 h3. Cost Tracking
 
-Amazon's Spot instances prices are declared at instance request time and defined by the maximum price that the user is willing to pay per hour. By default, this price is the same amount as the on-demand version of each instance type, and this setting is the one that nodemanager uses for now, as it doesn't include any pricing data to the spot instance request.
+Amazon's Spot instances prices are declared at instance request time and defined by the maximum price that the user is willing to pay per hour. By default, this price is the same amount as the on-demand version of each instance type, and this setting is the one that @arvados-dispatch-cloud@ uses for now, as it doesn't include any pricing data to the spot instance request.
 
 The real price that a spot instance has at any point in time is discovered at the end of each usage hour, depending on instance demand. For this reason, AWS provides a data feed subscription to get hourly logs, as described on "Amazon's User Guide":https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-data-feeds.html.
 
-h2(#nodemanager). Nodemanager
-
-If you are using the legacy Nodemanager, its config file must also declare preemptible instance sizes, which must match the API server's @InstanceTypes@:
-
-<pre>
-[Size m4.large]
-cores = 2
-scratch = 32000
-
-[Size m4.large.spot]
-cores = 2
-instance_type = m4.large
-preemptible = true
-scratch = 32000
-</pre>
diff --git a/doc/admin/token-expiration-policy.html.textile.liquid b/doc/admin/token-expiration-policy.html.textile.liquid
new file mode 100644 (file)
index 0000000..f5ee61b
--- /dev/null
@@ -0,0 +1,62 @@
+---
+layout: default
+navsection: admin
+title: Setting token expiration policy
+...
+
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+When a user logs in to Workbench, they receive a newly created token that grants access to the Arvados API on behalf of that user.  By default, this token does not expire until the user explicitly logs off.
+
+Security policies, such as for GxP Compliance, may require that tokens expire by default in order to limit the risk associated with a token being leaked.
+
+The @Login.TokenLifetime@ configuration enables the administrator to set a expiration lifetime for tokens granted through the login flow.
+
+h2. Setting token expiration
+
+Suppose that the organization's security policy requires that user sessions should not be valid for more than 12 hours, the cluster configuration should be set like the following:
+
+<pre>
+Clusters:
+  zzzzz:
+    ...
+    Login:
+      TokenLifetime: 12h
+    ...
+</pre>
+
+With this configuration, users will have to re-login every 12 hours.
+
+When this configuration is active, the workbench client will also be "untrusted" by default.  This means tokens issued to workbench cannot be used to list other tokens issued to the user, and cannot be used to grant new tokens.  This stops an attacker from leveraging a leaked token to aquire other tokens.
+
+The default @TokenLifetime@ is zero, which disables this feature.
+
+h2. Applying policy to existing tokens
+
+If you have an existing Arvados installation and want to set a token lifetime policy, there may be user tokens already granted.  The administrator can use the following @rake@ tasks to enforce the new policy.
+
+The @db:check_long_lived_tokens@ task will list which users have tokens with no expiration date.
+
+<notextile>
+<pre><code># <span class="userinput">bundle exec rake db:check_long_lived_tokens</span>
+Found 6 long-lived tokens from users:
+user2,user2@example.com,zzzzz-tpzed-5vzt5wc62k46p6r
+admin,admin@example.com,zzzzz-tpzed-6drplgwq9nm5cox
+user1,user1@example.com,zzzzz-tpzed-ftz2tfurbpf7xox
+</code></pre>
+</notextile>
+
+To apply the new policy to existing tokens, use the @db:fix_long_lived_tokens@ task.
+
+<notextile>
+<pre><code># <span class="userinput">bundle exec rake db:fix_long_lived_tokens</span>
+Setting token expiration to: 2020-08-25 03:30:50 +0000
+6 tokens updated.
+</code></pre>
+</notextile>
+
+NOTE: These rake tasks adjust the expiration of all tokens except those belonging to the system root user (@zzzzz-tpzed-000000000000000@).  If you have tokens used by automated service accounts that need to be long-lived, you can "create tokens that don't expire using the command line":user-management-cli.html#create-token .
index 84ef780faa09354037151f054fdb15daf9ba937f..061b68fa5d27b766e7d45bd0c08750fed210f5dd 100644 (file)
@@ -38,6 +38,10 @@ h2(#master). development master (as of 2020-06-17)
 
 "Upgrading from 2.0.0":#v2_0_0
 
+h3. Removing libpam-arvados, replaced with libpam-arvados-go
+
+The Python-based PAM package has been replaced with a version written in Go. See "using PAM for authentication":{{site.baseurl}}/install/setup-login.html#pam for details.
+
 h3. Removing sso-provider
 
 The SSO (single sign-on) component is deprecated and will not be supported in future releases. Existing configurations will continue to work in this release, but you should switch to one of the built-in authentication mechanisms as soon as possible. See "setting up web based login":{{site.baseurl}}/install/setup-login.html for details.
@@ -569,7 +573,7 @@ As part of story "#11349":https://dev.arvados.org/issues/11349, commit "2c094e2"
 
 * To enable it, add to your configuration file: <pre>[Manage]
   address = 127.0.0.1
-  port = 8989</pre> (see example configuration files in source:services/nodemanager/doc or https://doc.arvados.org/install/install-nodemanager.html for more info)
+  port = 8989</pre>
 * The server responds to @http://{address}:{port}/status.json@ with a summary of how many nodes are in each state (booting, busy, shutdown, etc.)
 
 h3. New websockets component (2017-03-23)
index 33969ea8f85c6b8786e3f88e12f4a88270ccef3a..6892176604a91346fc64eea037820f0c1262e3e6 100644 (file)
@@ -16,7 +16,7 @@ ARVADOS_API_HOST={{ site.arvados_api_host }}
 ARVADOS_API_TOKEN=1234567890qwertyuiopasdfghjklzxcvbnm1234567890zzzz
 </pre>
 
-In these examples, @x1u39-tpzed-3kz0nwtjehhl0u4@ is the sample user account.  Replace with the uuid of the user you wish to manipulate.
+In these examples, @zzzzz-tpzed-3kz0nwtjehhl0u4@ is the sample user account.  Replace with the uuid of the user you wish to manipulate.
 
 See "user management":{{site.baseurl}}/admin/activation.html for an overview of how to use these commands.
 
@@ -24,28 +24,68 @@ h3. Setup a user
 
 This creates a default git repository and VM login.  Enables user to self-activate using Workbench.
 
-<pre>
-arv user setup --uuid x1u39-tpzed-3kz0nwtjehhl0u4
-</pre>
+<notextile>
+<pre><code>$ <span class="userinput">arv user setup --uuid zzzzz-tpzed-3kz0nwtjehhl0u4</span>
+</code></pre>
+</notextile>
+
 
 h3. Deactivate user
 
-<pre>
-arv user unsetup --uuid x1u39-tpzed-3kz0nwtjehhl0u4
-</pre>
+<notextile>
+<pre><code>$ <span class="userinput">arv user unsetup --uuid zzzzz-tpzed-3kz0nwtjehhl0u4</span>
+</code></pre>
+</notextile>
+
 
 When deactivating a user, you may also want to "reassign ownership of their data":{{site.baseurl}}/admin/reassign-ownership.html .
 
 h3. Directly activate user
 
-<pre>
-arv user update --uuid "x1u39-tpzed-3kz0nwtjehhl0u4" --user '{"is_active":true}'
-</pre>
+<notextile>
+<pre><code>$ <span class="userinput">arv user update --uuid "zzzzz-tpzed-3kz0nwtjehhl0u4" --user '{"is_active":true}'</span>
+</code></pre>
+</notextile>
+
+Note: this bypasses user agreements checks, and does not set up the user with a default git repository or VM login.
 
-Note this bypasses user agreements checks, and does not set up the user with a default git repository or VM login.
+h3(#create-token). Create a token for a user
 
+As an admin, you can create tokens for other users.
+
+<notextile>
+<pre><code>$ <span class="userinput">arv api_client_authorization create --api-client-authorization '{"owner_uuid": "zzzzz-tpzed-fr97h9t4m5jffxs"}'</span>
+{
+ "href":"/api_client_authorizations/zzzzz-gj3su-yyyyyyyyyyyyyyy",
+ "kind":"arvados#apiClientAuthorization",
+ "etag":"9yk144t0v6cvyp0342exoh2vq",
+ "uuid":"zzzzz-gj3su-yyyyyyyyyyyyyyy",
+ "owner_uuid":"zzzzz-tpzed-fr97h9t4m5jffxs",
+ "created_at":"2020-03-12T20:36:12.517375422Z",
+ "modified_by_client_uuid":null,
+ "modified_by_user_uuid":null,
+ "modified_at":null,
+ "user_id":3,
+ "api_client_id":7,
+ "api_token":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+ "created_by_ip_address":null,
+ "default_owner_uuid":null,
+ "expires_at":null,
+ "last_used_at":null,
+ "last_used_by_ip_address":null,
+ "scopes":["all"]
+}
+</code></pre>
+</notextile>
+
+
+To get the token string, combine the values of @uuid@ and @api_token@ in the form "v2/$uuid/$api_token".  In this example the string that goes in @ARVADOS_API_TOKEN@ would be:
+
+<pre>
+ARVADOS_API_TOKEN=v2/zzzzz-gj3su-yyyyyyyyyyyyyyy/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+</pre>
 
-h2. Permissions
+h2. Adding Permissions
 
 h3. VM login
 
index 705048cd620cf566ad5ece5722e311262642d623..dddcd050731eaf712d1485458ba25791262a5615 100644 (file)
@@ -20,6 +20,7 @@ table(table table-bordered table-condensed).
 |_. Component|_. Description|
 |api|The API server is the core of Arvados.  It is backed by a Postgres database and manages information such as metadata for storage, a record of submitted compute jobs, users, groups, and associated permissions.|
 |arv-git-httpd|Provides a git+http interface to Arvados-managed git repositories, with permissions and authentication based on an Arvados API token.|
+|arvados-dispatch-cloud|Provide elastic computing by creating and destroying cloud based virtual machines on compute demand.|
 |crunch-dispatch-local|Get compute requests submitted to the API server and execute them locally.|
 |crunch-dispatch-slurm|Get compute requests submitted to the API server and submit them to slurm.|
 |crunch-run|Dispatched by crunch-dispatch, executes a single compute run: setting up a Docker container, running it, and collecting the output.|
@@ -31,8 +32,7 @@ table(table table-bordered table-condensed).
 |keepstore|Provides access to underlying storage (filesystem or object storage such as Amazon S3 or Azure Blob) with Arvados permissions.|
 |keep-web|Provides high-level WebDAV access to collections (file-level data access).|
 |login-sync|Synchronize virtual machine users with Arvados users and permissions.|
-|nodemanager|Provide elastic computing by creating and destroying cloud based virtual machines on compute demand.|
-|ws|Publishes API server change events over websockets.|
+|arvados-ws|Publishes API server change events over websockets.|
 |workbench|Web application providing user interface to Arvados services.|
 
 h3. Tools
index 23da428b395a994729ab02e5dcacb6cb1e3f3d2f..cdecc88152e38f1e34e2a1ebdbc26e6271174a96 100644 (file)
@@ -92,8 +92,6 @@ Options:
       Azure secrets file which will be sourced from this script
   --azure-resource-group (default: false, required if building for Azure)
       Azure resource group
-  --azure-storage-account (default: false, required if building for Azure)
-      Azure storage account
   --azure-location (default: false, required if building for Azure)
       Azure location, e.g. centralus, eastus, westeurope
   --azure-sku (default: unset, required if building for Azure, e.g. 16.04-LTS)
@@ -117,7 +115,6 @@ h2(#azure). Build an Azure image
 <notextile><pre><code>~$ <span class="userinput">./build.sh --json-file arvados-images-azure.json \
            --arvados-cluster-id ClusterID \
            --azure-resource-group ResourceGroup \
-           --azure-storage-account StorageAccount \
            --azure-location AzureRegion \
            --azure-sku AzureSKU \
            --azure-secrets-file AzureSecretsFilePath \
@@ -126,7 +123,7 @@ h2(#azure). Build an Azure image
 </span>
 </code></pre></notextile>
 
-For @ClusterID@, fill in your cluster ID. The @ResourceGroup@, @StorageAccount@ and @AzureRegion@ (e.g. 'eastus2') should be configured for where you want the compute image to be generated and stored. The @AzureSKU@ is the SKU of the base image to be used, e.g. '18.04-LTS' for Ubuntu 18.04.
+For @ClusterID@, fill in your cluster ID. The @ResourceGroup@ and @AzureRegion@ (e.g. 'eastus2') should be configured for where you want the compute image to be generated and stored. The @AzureSKU@ is the SKU of the base image to be used, e.g. '18.04-LTS' for Ubuntu 18.04.
 
 @AzureSecretsFilePath@ should be replaced with the path to a shell script that loads the Azure secrets with sufficient permissions to create the image. The file would look like this:
 
index faa7c5b953fcf6febf3b32080914c392d27a5a7e..68417784701ce387e7437bb0f0b8e62a2335e5ff 100644 (file)
@@ -93,6 +93,77 @@ h4. Minimal configuration example for Amazon EC2
 
 h4. Minimal configuration example for Azure
 
+Using managed disks:
+
+<notextile>
+<pre><code>    Containers:
+      CloudVMs:
+        ImageID: "zzzzz-compute-v1597349873"
+        Driver: azure
+        DriverParameters:
+          # Credentials.
+          SubscriptionID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientSecret: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          TenantID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+
+          # Data center where VMs will be allocated
+          Location: centralus
+
+          # The resource group where the VM and virtual NIC will be
+          # created.
+          ResourceGroup: zzzzz
+          NetworkResourceGroup: yyyyy   # only if different from ResourceGroup
+          Network: xxxxx
+          Subnet: xxxxx-subnet-private
+
+          # The resource group where the disk image is stored, only needs to
+          # be specified if it is different from ResourceGroup
+          ImageResourceGroup: aaaaa
+
+</code></pre>
+</notextile>
+
+Azure recommends using managed images. If you plan to start more than 20 VMs simultaneously, Azure recommends using a shared image gallery instead to avoid slowdowns and timeouts during the creation of the VMs.
+
+Using an image from a shared image gallery:
+
+<notextile>
+<pre><code>    Containers:
+      CloudVMs:
+        ImageID: "shared_image_gallery_image_definition_name"
+        Driver: azure
+        DriverParameters:
+          # Credentials.
+          SubscriptionID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientSecret: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          TenantID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+
+          # Data center where VMs will be allocated
+          Location: centralus
+
+          # The resource group where the VM and virtual NIC will be
+          # created.
+          ResourceGroup: zzzzz
+          NetworkResourceGroup: yyyyy   # only if different from ResourceGroup
+          Network: xxxxx
+          Subnet: xxxxx-subnet-private
+
+          # The resource group where the disk image is stored, only needs to
+          # be specified if it is different from ResourceGroup
+          ImageResourceGroup: aaaaa
+
+          # (azure) shared image gallery: the name of the gallery
+          SharedImageGalleryName: "shared_image_gallery_1"
+          # (azure) shared image gallery: the version of the image definition
+          SharedImageGalleryImageVersion: "0.0.1"
+
+</code></pre>
+</notextile>
+
+Using unmanaged disks (deprecated):
+
 <notextile>
 <pre><code>    Containers:
       CloudVMs:
index a94de2a601b663047869d2a1bc84869b008b3214..55095b1f20f05cb21e203a9ba6a39fa3f069a2dd 100644 (file)
@@ -60,8 +60,8 @@ table(table table-bordered table-condensed).
 |"Shell server":install-shell-server.html |Synchronize (create/delete/configure) Unix shell accounts with Arvados users.|Optional.|
 |"Git server":install-arv-git-httpd.html |Arvados-hosted git repositories, with Arvados-token based authentication.|Optional, but required by Workflow Composer.|
 |\3=. *Crunch (running containers)*|
-|"crunch-dispatch-slurm":crunch2-slurm/install-prerequisites.html |Run analysis workflows using Docker containers distributed across a Slurm cluster.|Optional if you wish to use Arvados for data management only.|
-|"Node Manager":install-nodemanager.html, "arvados-dispatch-cloud":crunch2-cloud/install-dispatch-cloud.html |Allocate and free cloud VM instances on demand based on workload.|Optional, not needed for a static Slurm cluster (such as on-premises HPC).|
+|"arvados-dispatch-cloud":crunch2-cloud/install-dispatch-cloud.html |Allocate and free cloud VM instances on demand based on workload.|Optional, not needed for a static Slurm cluster such as on-premises HPC.|
+|"crunch-dispatch-slurm":crunch2-slurm/install-prerequisites.html |Run analysis workflows using Docker containers distributed across a Slurm cluster.|Optional, not needed for a Cloud installation, or if you wish to use Arvados for data management only.|
 
 h2(#identity). Identity provider
 
diff --git a/doc/install/install-nodemanager.html.textile.liquid b/doc/install/install-nodemanager.html.textile.liquid
deleted file mode 100644 (file)
index 75e4b25..0000000
+++ /dev/null
@@ -1,629 +0,0 @@
----
-layout: default
-navsection: installguide
-title: Install Node Manager
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-Arvados Node Manager provides elastic computing for Arvados and Slurm by creating and destroying virtual machines on demand.  Node Manager currently supports Amazon Web Services (AWS), Google Cloud Platform (GCP) and Microsoft Azure.
-
-Note: node manager is only required for elastic computing cloud environments.  Fixed size clusters (such as on-premise HPC) do not require node manager.
-
-h2. Install
-
-Node manager may run anywhere, however it must be able to communicate with the cloud provider's APIs, and use the command line tools @sinfo@, @squeue@ and @scontrol@ to communicate with the cluster's Slurm controller.
-
-On Debian-based systems:
-
-<notextile>
-<pre><code>~$ <span class="userinput">sudo apt-get install arvados-node-manager</span>
-</code></pre>
-</notextile>
-
-On Red Hat-based systems:
-
-<notextile>
-<pre><code>~$ <span class="userinput">sudo yum install arvados-node-manager</span>
-</code></pre>
-</notextile>
-
-h2. Create compute image
-
-Configure a virtual machine following the "instructions to set up a compute node.":{{site.baseurl}}/install/crunch2-slurm/install-compute-node.html and set it up to run a "ping script":{{site.baseurl}}/install/install-compute-ping.html at boot.
-
-Create a virtual machine image using the commands provided by your cloud provider.  We recommend using a tool such as "Packer":https://www.packer.io/ to automate this process.
-
-Configure node manager to use the image with the @image@ or @image_id@ parameter.
-
-h2. Configure node manager
-
-The configuration file at @/etc/arvados-node-manager/config.ini@ .  Some configuration details are specific to the cloud provider you are using:
-
-* "Amazon Web Services":#aws
-* "Google Cloud Platform":#gcp
-* "Microsoft Azure":#azure
-
-h3(#aws). Amazon Web Services
-
-<pre>
-# EC2 configuration for Arvados Node Manager.
-# All times are in seconds unless specified otherwise.
-
-[Manage]
-# The management server responds to http://addr:port/status.json with
-# a snapshot of internal state.
-
-# Management server listening address (default 127.0.0.1)
-#address = 0.0.0.0
-
-# Management server port number (default -1, server is disabled)
-#port = 8989
-
-[Daemon]
-# The dispatcher can customize the start and stop procedure for
-# cloud nodes.  For example, the Slurm dispatcher drains nodes
-# through Slurm before shutting them down.
-dispatcher = slurm
-
-# Node Manager will ensure that there are at least this many nodes running at
-# all times.  If node manager needs to start new idle nodes for the purpose of
-# satisfying min_nodes, it will use the cheapest node type.  However, depending
-# on usage patterns, it may also satisfy min_nodes by keeping alive some
-# more-expensive nodes
-min_nodes = 0
-
-# Node Manager will not start any compute nodes when at least this
-# many are running.
-max_nodes = 8
-
-# Upper limit on rate of spending (in $/hr), will not boot additional nodes
-# if total price of already running nodes meets or exceeds this threshold.
-# default 0 means no limit.
-max_total_price = 0
-
-# Poll EC2 nodes and Arvados for new information every N seconds.
-poll_time = 60
-
-# Polls have exponential backoff when services fail to respond.
-# This is the longest time to wait between polls.
-max_poll_time = 300
-
-# If Node Manager can't succesfully poll a service for this long,
-# it will never start or stop compute nodes, on the assumption that its
-# information is too outdated.
-poll_stale_after = 600
-
-# If Node Manager boots a cloud node, and it does not pair with an Arvados
-# node before this long, assume that there was a cloud bootstrap failure and
-# shut it down.  Note that normal shutdown windows apply (see the Cloud
-# section), so this should be shorter than the first shutdown window value.
-boot_fail_after = 1800
-
-# "Node stale time" affects two related behaviors.
-# 1. If a compute node has been running for at least this long, but it
-# isn't paired with an Arvados node, do not shut it down, but leave it alone.
-# This prevents the node manager from shutting down a node that might
-# actually be doing work, but is having temporary trouble contacting the
-# API server.
-# 2. When the Node Manager starts a new compute node, it will try to reuse
-# an Arvados node that hasn't been updated for this long.
-node_stale_after = 14400
-
-# Number of consecutive times a node must report as "idle" before it
-# will be considered eligible for shutdown.  Node status is checked
-# each poll period, and node can go idle at any point during a poll
-# period (meaning a node could be reported as idle that has only been
-# idle for 1 second).  With a 60 second poll period, three consecutive
-# status updates of "idle" suggests the node has been idle at least
-# 121 seconds.
-consecutive_idle_count = 3
-
-# Scaling factor to be applied to nodes' available RAM size. Usually there's a
-# variable discrepancy between the advertised RAM value on cloud nodes and the
-# actual amount available.
-# If not set, this value will be set to 0.95
-node_mem_scaling = 0.95
-
-# File path for Certificate Authorities
-certs_file = /etc/ssl/certs/ca-certificates.crt
-
-[Logging]
-# Log file path
-file = /var/log/arvados/node-manager.log
-
-# Log level for most Node Manager messages.
-# Choose one of DEBUG, INFO, WARNING, ERROR, or CRITICAL.
-# WARNING lets you know when polling a service fails.
-# INFO additionally lets you know when a compute node is started or stopped.
-level = INFO
-
-# You can also set different log levels for specific libraries.
-# Pykka is the Node Manager's actor library.
-# Setting this to DEBUG will display tracebacks for uncaught
-# exceptions in the actors, but it's also very chatty.
-pykka = WARNING
-
-# Setting apiclient to INFO will log the URL of every Arvados API request.
-apiclient = WARNING
-
-[Arvados]
-host = zyxwv.arvadosapi.com
-token = ARVADOS_TOKEN
-timeout = 15
-
-# Accept an untrusted SSL certificate from the API server?
-insecure = no
-
-[Cloud]
-provider = ec2
-
-# It's usually most cost-effective to shut down compute nodes during narrow
-# windows of time.  For example, EC2 bills each node by the hour, so the best
-# time to shut down a node is right before a new hour of uptime starts.
-# Shutdown windows define these periods of time.  These are windows in
-# full minutes, separated by commas.  Counting from the time the node is
-# booted, the node WILL NOT shut down for N1 minutes; then it MAY shut down
-# for N2 minutes; then it WILL NOT shut down for N3 minutes; and so on.
-# For example, "54, 5, 1" means the node may shut down from the 54th to the
-# 59th minute of each hour of uptime.
-# Specify at least two windows.  You can add as many as you need beyond that.
-shutdown_windows = 54, 5, 1
-
-[Cloud Credentials]
-key = KEY
-secret = SECRET_KEY
-region = us-east-1
-timeout = 60
-
-[Cloud List]
-# This section defines filters that find compute nodes.
-# Tags that you specify here will automatically be added to nodes you create.
-# Replace colons in Amazon filters with underscores
-# (e.g., write "tag:mytag" as "tag_mytag").
-instance-state-name = running
-tag_arvados-class = dynamic-compute
-tag_cluster = zyxwv
-
-[Cloud Create]
-# New compute nodes will send pings to Arvados at this host.
-# You may specify a port, and use brackets to disambiguate IPv6 addresses.
-ping_host = hostname:port
-
-# Give the name of an SSH key on AWS...
-ex_keyname = string
-
-# ... or a file path for an SSH key that can log in to the compute node.
-# (One or the other, not both.)
-# ssh_key = path
-
-# The EC2 IDs of the image and subnet compute nodes should use.
-image_id = idstring
-subnet_id = idstring
-
-# Comma-separated EC2 IDs for the security group(s) assigned to each
-# compute node.
-security_groups = idstring1, idstring2
-
-# Apply an Instance Profile ARN to the newly created compute nodes
-# For more info, see:
-# https://aws.amazon.com/premiumsupport/knowledge-center/iam-policy-restrict-vpc/
-# ex_iamprofile = arn:aws:iam::ACCOUNTNUMBER:instance-profile/ROLENAME
-
-
-# You can define any number of Size sections to list EC2 sizes you're
-# willing to use.  The Node Manager should boot the cheapest size(s) that
-# can run jobs in the queue.
-#
-# Each size section MUST define the number of cores are available in this
-# size class (since libcloud does not provide any consistent API for exposing
-# this setting).
-# You may also want to define the amount of scratch space (expressed
-# in GB) for Crunch jobs.  You can also override Amazon's provided
-# data fields (such as price per hour) by setting them here.
-
-[Size m4.large]
-cores = 2
-price = 0.126
-scratch = 100
-
-[Size m4.xlarge]
-cores = 4
-price = 0.252
-scratch = 100
-</pre>
-
-h3(#gcp). Google Cloud Platform
-
-<pre>
-# Google Compute Engine configuration for Arvados Node Manager.
-# All times are in seconds unless specified otherwise.
-
-[Manage]
-# The management server responds to http://addr:port/status.json with
-# a snapshot of internal state.
-
-# Management server listening address (default 127.0.0.1)
-#address = 0.0.0.0
-
-# Management server port number (default -1, server is disabled)
-#port = 8989
-
-[Daemon]
-# Node Manager will ensure that there are at least this many nodes running at
-# all times.  If node manager needs to start new idle nodes for the purpose of
-# satisfying min_nodes, it will use the cheapest node type.  However, depending
-# on usage patterns, it may also satisfy min_nodes by keeping alive some
-# more-expensive nodes
-min_nodes = 0
-
-# Node Manager will not start any compute nodes when at least this
-# running at all times.  By default, these will be the cheapest node size.
-max_nodes = 8
-
-# Poll compute nodes and Arvados for new information every N seconds.
-poll_time = 60
-
-# Upper limit on rate of spending (in $/hr), will not boot additional nodes
-# if total price of already running nodes meets or exceeds this threshold.
-# default 0 means no limit.
-max_total_price = 0
-
-# Polls have exponential backoff when services fail to respond.
-# This is the longest time to wait between polls.
-max_poll_time = 300
-
-# If Node Manager can't succesfully poll a service for this long,
-# it will never start or stop compute nodes, on the assumption that its
-# information is too outdated.
-poll_stale_after = 600
-
-# "Node stale time" affects two related behaviors.
-# 1. If a compute node has been running for at least this long, but it
-# isn't paired with an Arvados node, do not shut it down, but leave it alone.
-# This prevents the node manager from shutting down a node that might
-# actually be doing work, but is having temporary trouble contacting the
-# API server.
-# 2. When the Node Manager starts a new compute node, it will try to reuse
-# an Arvados node that hasn't been updated for this long.
-node_stale_after = 14400
-
-# Number of consecutive times a node must report as "idle" before it
-# will be considered eligible for shutdown.  Node status is checked
-# each poll period, and node can go idle at any point during a poll
-# period (meaning a node could be reported as idle that has only been
-# idle for 1 second).  With a 60 second poll period, three consecutive
-# status updates of "idle" suggests the node has been idle at least
-# 121 seconds.
-consecutive_idle_count = 3
-
-# Scaling factor to be applied to nodes' available RAM size. Usually there's a
-# variable discrepancy between the advertised RAM value on cloud nodes and the
-# actual amount available.
-# If not set, this value will be set to 0.95
-node_mem_scaling = 0.95
-
-# File path for Certificate Authorities
-certs_file = /etc/ssl/certs/ca-certificates.crt
-
-[Logging]
-# Log file path
-file = /var/log/arvados/node-manager.log
-
-# Log level for most Node Manager messages.
-# Choose one of DEBUG, INFO, WARNING, ERROR, or CRITICAL.
-# WARNING lets you know when polling a service fails.
-# INFO additionally lets you know when a compute node is started or stopped.
-level = INFO
-
-# You can also set different log levels for specific libraries.
-# Pykka is the Node Manager's actor library.
-# Setting this to DEBUG will display tracebacks for uncaught
-# exceptions in the actors, but it's also very chatty.
-pykka = WARNING
-
-# Setting apiclient to INFO will log the URL of every Arvados API request.
-apiclient = WARNING
-
-[Arvados]
-host = zyxwv.arvadosapi.com
-token = ARVADOS_TOKEN
-timeout = 15
-
-# Accept an untrusted SSL certificate from the API server?
-insecure = no
-
-[Cloud]
-provider = gce
-
-# Shutdown windows define periods of time when a node may and may not
-# be shut down.  These are windows in full minutes, separated by
-# commas.  Counting from the time the node is booted, the node WILL
-# NOT shut down for N1 minutes; then it MAY shut down for N2 minutes;
-# then it WILL NOT shut down for N3 minutes; and so on.  For example,
-# "54, 5, 1" means the node may shut down from the 54th to the 59th
-# minute of each hour of uptime.
-# GCE bills by the minute, and does not provide information about when
-# a node booted.  Node Manager will store this information in metadata
-# when it boots a node; if that information is not available, it will
-# assume the node booted at the epoch.  These shutdown settings are
-# very aggressive.  You may want to adjust this if you want more
-# continuity of service from a single node.
-shutdown_windows = 20, 999999
-
-[Cloud Credentials]
-user_id = client_email_address@developer.gserviceaccount.com
-key = path_to_certificate.pem
-project = project-id-from-google-cloud-dashboard
-timeout = 60
-
-# Valid location (zone) names: https://cloud.google.com/compute/docs/zones
-datacenter = us-central1-a
-
-# Optional settings. For full documentation see
-# http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#libcloud.compute.drivers.gce.GCENodeDriver
-#
-# auth_type = SA               # SA, IA or GCE
-# scopes = https://www.googleapis.com/auth/compute
-# credential_file =
-
-[Cloud List]
-# A comma-separated list of tags that must be applied to a node for it to
-# be considered a compute node.
-# The driver will automatically apply these tags to nodes it creates.
-tags = zyxwv, compute
-
-[Cloud Create]
-# New compute nodes will send pings to Arvados at this host.
-# You may specify a port, and use brackets to disambiguate IPv6 addresses.
-ping_host = hostname:port
-
-# A file path for an SSH key that can log in to the compute node.
-# ssh_key = path
-
-# The GCE image name and network zone name to use when creating new nodes.
-image = debian
-# network = your_network_name
-
-# JSON string of service account authorizations for this cluster.
-# See http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#specifying-service-account-scopes
-# service_accounts = [{'email':'account@example.com', 'scopes':['storage-ro']}]
-
-
-# You can define any number of Size sections to list node sizes you're
-# willing to use.  The Node Manager should boot the cheapest size(s) that
-# can run jobs in the queue.
-#
-# The Size fields are interpreted the same way as with a libcloud NodeSize:
-# http://libcloud.readthedocs.org/en/latest/compute/api.html#libcloud.compute.base.NodeSize
-#
-# See https://cloud.google.com/compute/docs/machine-types for a list
-# of known machine types that may be used as a Size parameter.
-#
-# Each size section MUST define the number of cores are available in this
-# size class (since libcloud does not provide any consistent API for exposing
-# this setting).
-# You may also want to define the amount of scratch space (expressed
-# in GB) for Crunch jobs.
-# You can also override Google's provided data fields (such as price per hour)
-# by setting them here.
-
-[Size n1-standard-2]
-cores = 2
-price = 0.076
-scratch = 100
-
-[Size n1-standard-4]
-cores = 4
-price = 0.152
-scratch = 200
-</pre>
-
-h3(#azure). Microsoft Azure
-
-<pre>
-# Azure configuration for Arvados Node Manager.
-# All times are in seconds unless specified otherwise.
-
-[Manage]
-# The management server responds to http://addr:port/status.json with
-# a snapshot of internal state.
-
-# Management server listening address (default 127.0.0.1)
-#address = 0.0.0.0
-
-# Management server port number (default -1, server is disabled)
-#port = 8989
-
-[Daemon]
-# The dispatcher can customize the start and stop procedure for
-# cloud nodes.  For example, the Slurm dispatcher drains nodes
-# through Slurm before shutting them down.
-dispatcher = slurm
-
-# Node Manager will ensure that there are at least this many nodes running at
-# all times.  If node manager needs to start new idle nodes for the purpose of
-# satisfying min_nodes, it will use the cheapest node type.  However, depending
-# on usage patterns, it may also satisfy min_nodes by keeping alive some
-# more-expensive nodes
-min_nodes = 0
-
-# Node Manager will not start any compute nodes when at least this
-# many are running.
-max_nodes = 8
-
-# Upper limit on rate of spending (in $/hr), will not boot additional nodes
-# if total price of already running nodes meets or exceeds this threshold.
-# default 0 means no limit.
-max_total_price = 0
-
-# Poll Azure nodes and Arvados for new information every N seconds.
-poll_time = 60
-
-# Polls have exponential backoff when services fail to respond.
-# This is the longest time to wait between polls.
-max_poll_time = 300
-
-# If Node Manager can't succesfully poll a service for this long,
-# it will never start or stop compute nodes, on the assumption that its
-# information is too outdated.
-poll_stale_after = 600
-
-# If Node Manager boots a cloud node, and it does not pair with an Arvados
-# node before this long, assume that there was a cloud bootstrap failure and
-# shut it down.  Note that normal shutdown windows apply (see the Cloud
-# section), so this should be shorter than the first shutdown window value.
-boot_fail_after = 1800
-
-# "Node stale time" affects two related behaviors.
-# 1. If a compute node has been running for at least this long, but it
-# isn't paired with an Arvados node, do not shut it down, but leave it alone.
-# This prevents the node manager from shutting down a node that might
-# actually be doing work, but is having temporary trouble contacting the
-# API server.
-# 2. When the Node Manager starts a new compute node, it will try to reuse
-# an Arvados node that hasn't been updated for this long.
-node_stale_after = 14400
-
-# Number of consecutive times a node must report as "idle" before it
-# will be considered eligible for shutdown.  Node status is checked
-# each poll period, and node can go idle at any point during a poll
-# period (meaning a node could be reported as idle that has only been
-# idle for 1 second).  With a 60 second poll period, three consecutive
-# status updates of "idle" suggests the node has been idle at least
-# 121 seconds.
-consecutive_idle_count = 3
-
-# Scaling factor to be applied to nodes' available RAM size. Usually there's a
-# variable discrepancy between the advertised RAM value on cloud nodes and the
-# actual amount available.
-# If not set, this value will be set to 0.95
-node_mem_scaling = 0.95
-
-# File path for Certificate Authorities
-certs_file = /etc/ssl/certs/ca-certificates.crt
-
-[Logging]
-# Log file path
-file = /var/log/arvados/node-manager.log
-
-# Log level for most Node Manager messages.
-# Choose one of DEBUG, INFO, WARNING, ERROR, or CRITICAL.
-# WARNING lets you know when polling a service fails.
-# INFO additionally lets you know when a compute node is started or stopped.
-level = INFO
-
-# You can also set different log levels for specific libraries.
-# Pykka is the Node Manager's actor library.
-# Setting this to DEBUG will display tracebacks for uncaught
-# exceptions in the actors, but it's also very chatty.
-pykka = WARNING
-
-# Setting apiclient to INFO will log the URL of every Arvados API request.
-apiclient = WARNING
-
-[Arvados]
-host = zyxwv.arvadosapi.com
-token = ARVADOS_TOKEN
-timeout = 15
-
-# Accept an untrusted SSL certificate from the API server?
-insecure = no
-
-[Cloud]
-provider = azure
-
-# Shutdown windows define periods of time when a node may and may not be shut
-# down.  These are windows in full minutes, separated by commas.  Counting from
-# the time the node is booted, the node WILL NOT shut down for N1 minutes; then
-# it MAY shut down for N2 minutes; then it WILL NOT shut down for N3 minutes;
-# and so on.  For example, "20, 999999" means the node may shut down between
-# the 20th and 999999th minutes of uptime.
-# Azure bills by the minute, so it makes sense to agressively shut down idle
-# nodes.  Specify at least two windows.  You can add as many as you need beyond
-# that.
-shutdown_windows = 20, 999999
-
-[Cloud Credentials]
-# Use "azure account list" with the azure CLI to get these values.
-tenant_id = 00000000-0000-0000-0000-000000000000
-subscription_id = 00000000-0000-0000-0000-000000000000
-
-# The following directions are based on
-# https://azure.microsoft.com/en-us/documentation/articles/resource-group-authenticate-service-principal/
-# and updated for v2 of the Azure cli tool.
-#
-# az ad app create --display-name "Node Manager" --homepage "https://arvados.org" --identifier-uris "https://<Your_Application_Uri>" --password <Your_Password> --end-date <Desired_credential_expiry_date>
-# az ad sp create "<Application_Id>"
-# az role assignment create --assignee "<Application_Id>" --role Owner --resource-group "<Your_Azure_Arvados_Resource_Group>"
-#
-# Use <Application_Id> for "key" and the <Your_Password> for "secret"
-#
-key = 00000000-0000-0000-0000-000000000000
-secret = PASSWORD
-timeout = 60
-region = East US
-
-[Cloud List]
-# The resource group in which the compute node virtual machines will be created
-# and listed.
-ex_resource_group = ArvadosResourceGroup
-
-[Cloud Create]
-# The compute node image, as a link to a VHD in Azure blob store.
-image = https://example.blob.core.windows.net/system/Microsoft.Compute/Images/images/zyxwv-compute-osDisk.vhd
-
-# Path to a local ssh key file that will be used to provision new nodes.
-ssh_key = /home/arvadosuser/.ssh/id_rsa.pub
-
-# The account name for the admin user that will be provisioned on new nodes.
-ex_user_name = arvadosuser
-
-# The Azure storage account that will be used to store the node OS disk images.
-ex_storage_account = arvadosstorage
-
-# The virtual network the VMs will be associated with.
-ex_network = ArvadosNetwork
-
-# Optional subnet of the virtual network.
-#ex_subnet = default
-
-# Node tags
-tag_arvados-class = dynamic-compute
-tag_cluster = zyxwv
-
-# the API server to ping
-ping_host = hostname:port
-
-# You can define any number of Size sections to list Azure sizes you're willing
-# to use.  The Node Manager should boot the cheapest size(s) that can run jobs
-# in the queue.  You must also provide price per hour as the Azure driver
-# compute currently does not report prices.
-#
-# See https://azure.microsoft.com/en-us/pricing/details/virtual-machines/
-# for a list of known machine types that may be used as a Size parameter.
-#
-# Each size section MUST define the number of cores are available in this
-# size class (since libcloud does not provide any consistent API for exposing
-# this setting).
-# You may also want to define the amount of scratch space (expressed
-# in GB) for Crunch jobs.  You can also override Microsoft's provided
-# data fields by setting them here.
-
-[Size Standard_D3]
-cores = 4
-price = 0.56
-
-[Size Standard_D4]
-cores = 8
-price = 1.12
-</pre>
-
-h2. Running
-
-<pre>
-$ arvados-node-manager --config /etc/arvados-node-manager/config.ini
-</pre>
index ff8b8052e56e906ad7513560e12102d4145f2a0f..82741c3ea64f8548a9a5aaa3ce12ca3828ac44f7 100644 (file)
@@ -81,7 +81,7 @@ def get_cr_state(cr_uuid):
             return 'On hold'
         else:
             return 'Queued'
-    elif c['state'] == 'Complete' and c['exit_code'] != 0
+    elif c['state'] == 'Complete' and c['exit_code'] != 0:
         return 'Failed'
     elif c['state'] == 'Running':
         if c['runtime_status'].get('error', None):
@@ -144,7 +144,7 @@ child_requests = api.container_requests().list(filters=[
 child_containers = {c["container_uuid"]: c for c in child_requests["items"]}
 cancelled_child_containers = api.containers().list(filters=[
     ["exit_code", "!=", "0"],
-    ["uuid", "in", child_containers.keys()]], limit=1000).execute()
+    ["uuid", "in", list(child_containers.keys())]], limit=1000).execute()
 for c in cancelled_child_containers["items"]:
     print("%s (%s)" % (child_containers[c["uuid"]]["name"], child_containers[c["uuid"]]["uuid"]))
 {% endcodeblock %}
@@ -159,7 +159,8 @@ container_request_uuid = "zzzzz-xvhdp-zzzzzzzzzzzzzzz"
 container_request = api.container_requests().get(uuid=container_request_uuid).execute()
 collection = arvados.collection.CollectionReader(container_request["log_uuid"])
 for c in collection:
-    print(collection.open(c).read())
+    if isinstance(collection.find(c), arvados.arvfile.ArvadosFile):
+        print(collection.open(c).read())
 {% endcodeblock %}
 
 h2(#sharing_link). Create a collection sharing link
diff --git a/go.mod b/go.mod
index 71052882adbeff703ae81a21900561afe15c8743..262978d9125d412b32bfee22508bcfe517de8ec6 100644 (file)
--- a/go.mod
+++ b/go.mod
@@ -4,8 +4,12 @@ go 1.13
 
 require (
        github.com/AdRoll/goamz v0.0.0-20170825154802-2731d20f46f4
-       github.com/Azure/azure-sdk-for-go v19.1.0+incompatible
-       github.com/Azure/go-autorest v10.15.2+incompatible
+       github.com/Azure/azure-sdk-for-go v45.1.0+incompatible
+       github.com/Azure/go-autorest v14.2.0+incompatible
+       github.com/Azure/go-autorest/autorest v0.11.3
+       github.com/Azure/go-autorest/autorest/azure/auth v0.5.1
+       github.com/Azure/go-autorest/autorest/to v0.4.0
+       github.com/Azure/go-autorest/autorest/validation v0.3.0 // indirect
        github.com/Microsoft/go-winio v0.4.5 // indirect
        github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7 // indirect
        github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 // indirect
@@ -16,8 +20,6 @@ require (
        github.com/bradleypeabody/godap v0.0.0-20170216002349-c249933bc092
        github.com/coreos/go-oidc v2.1.0+incompatible
        github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7
-       github.com/dgrijalva/jwt-go v3.1.0+incompatible // indirect
-       github.com/dimchansky/utfbom v1.0.0 // indirect
        github.com/dnaeon/go-vcr v1.0.1 // indirect
        github.com/docker/distribution v2.6.0-rc.1.0.20180105232752-277ed486c948+incompatible // indirect
        github.com/docker/docker v1.4.2-0.20180109013817-94b8a116fbf1
@@ -44,7 +46,6 @@ require (
        github.com/kevinburke/ssh_config v0.0.0-20171013211458-802051befeb5 // indirect
        github.com/lib/pq v1.3.0
        github.com/marstr/guid v1.1.1-0.20170427235115-8bdf7d1a087c // indirect
-       github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747 // indirect
        github.com/msteinert/pam v0.0.0-20190215180659-f29b9f28d6f9
        github.com/opencontainers/go-digest v1.0.0-rc1 // indirect
        github.com/opencontainers/image-spec v1.0.1-0.20171125024018-577479e4dc27 // indirect
@@ -57,9 +58,8 @@ require (
        github.com/sergi/go-diff v1.0.0 // indirect
        github.com/sirupsen/logrus v1.4.2
        github.com/src-d/gcfg v1.3.0 // indirect
-       github.com/stretchr/testify v1.4.0 // indirect
        github.com/xanzy/ssh-agent v0.1.0 // indirect
-       golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550
+       golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9
        golang.org/x/net v0.0.0-20200202094626-16171245cfb2
        golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45
        golang.org/x/sys v0.0.0-20191105231009-c1f44814a5cd
diff --git a/go.sum b/go.sum
index 2565964e7d45121d76e59e0c7b5e21743beaaa28..85d205112fb95ecf1895d96122686e0e2e2a849b 100644 (file)
--- a/go.sum
+++ b/go.sum
@@ -2,10 +2,40 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
 cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
 cloud.google.com/go v0.38.0 h1:ROfEUZz+Gh5pa62DJWXSaonyu3StP6EA6lPEXPI6mCo=
 cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
+github.com/Azure/azure-sdk-for-go v0.2.0-beta h1:wYBqYNMWr0WL2lcEZi+dlK9n+N0wJ0Pjs4BKeOnDjfQ=
 github.com/Azure/azure-sdk-for-go v19.1.0+incompatible h1:ysqLW+tqZjJWOTE74heH/pDRbr4vlN3yV+dqQYgpyxw=
 github.com/Azure/azure-sdk-for-go v19.1.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
+github.com/Azure/azure-sdk-for-go v20.2.0+incompatible h1:La3ODnagAOf5ZFUepTfVftvNTdxkq06DNpgi1l0yaM0=
+github.com/Azure/azure-sdk-for-go v20.2.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
+github.com/Azure/azure-sdk-for-go v45.1.0+incompatible h1:kxtaPD8n2z5Za+9e3sKsYG2IX6PG2R6VXtgS7gAbh3A=
+github.com/Azure/azure-sdk-for-go v45.1.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
+github.com/Azure/go-autorest v1.1.1 h1:4G9tVCqooRY3vDTB2bA1Z01PlSALtnUbji0AfzthUSs=
 github.com/Azure/go-autorest v10.15.2+incompatible h1:oZpnRzZie83xGV5txbT1aa/7zpCPvURGhV6ThJij2bs=
 github.com/Azure/go-autorest v10.15.2+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
+github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs=
+github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
+github.com/Azure/go-autorest/autorest v0.11.0/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw=
+github.com/Azure/go-autorest/autorest v0.11.3 h1:fyYnmYujkIXUgv88D9/Wo2ybE4Zwd/TmQd5sSI5u2Ws=
+github.com/Azure/go-autorest/autorest v0.11.3/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw=
+github.com/Azure/go-autorest/autorest/adal v0.9.0/go.mod h1:/c022QCutn2P7uY+/oQWWNcK9YU+MH96NgK+jErpbcg=
+github.com/Azure/go-autorest/autorest/adal v0.9.2 h1:Aze/GQeAN1RRbGmnUJvUj+tFGBzFdIg3293/A9rbxC4=
+github.com/Azure/go-autorest/autorest/adal v0.9.2/go.mod h1:/3SMAM86bP6wC9Ev35peQDUeqFZBMH07vvUOmg4z/fE=
+github.com/Azure/go-autorest/autorest/azure/auth v0.5.1 h1:bvUhZciHydpBxBmCheUgxxbSwJy7xcfjkUsjUcqSojc=
+github.com/Azure/go-autorest/autorest/azure/auth v0.5.1/go.mod h1:ea90/jvmnAwDrSooLH4sRIehEPtG/EPUXavDh31MnA4=
+github.com/Azure/go-autorest/autorest/azure/cli v0.4.0 h1:Ml+UCrnlKD+cJmSzrZ/RDcDw86NjkRUpnFh7V5JUhzU=
+github.com/Azure/go-autorest/autorest/azure/cli v0.4.0/go.mod h1:JljT387FplPzBA31vUcvsetLKF3pec5bdAxjVU4kI2s=
+github.com/Azure/go-autorest/autorest/date v0.3.0 h1:7gUk1U5M/CQbp9WoqinNzJar+8KY+LPI6wiWrP/myHw=
+github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74=
+github.com/Azure/go-autorest/autorest/mocks v0.4.0/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k=
+github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k=
+github.com/Azure/go-autorest/autorest/to v0.4.0 h1:oXVqrxakqqV1UZdSazDOPOLvOIz+XA683u8EctwboHk=
+github.com/Azure/go-autorest/autorest/to v0.4.0/go.mod h1:fE8iZBn7LQR7zH/9XU2NcPR4o9jEImooCeWJcYV/zLE=
+github.com/Azure/go-autorest/autorest/validation v0.3.0 h1:3I9AAI63HfcLtphd9g39ruUwRI+Ca+z/f36KHPFRUss=
+github.com/Azure/go-autorest/autorest/validation v0.3.0/go.mod h1:yhLgjC0Wda5DYXl6JAsWyUe4KVNffhoDhG0zVzUMo3E=
+github.com/Azure/go-autorest/logger v0.2.0 h1:e4RVHVZKC5p6UANLJHkM4OfR1UKZPj8Wt8Pcx+3oqrE=
+github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8=
+github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo=
+github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
 github.com/Microsoft/go-winio v0.4.5 h1:U2XsGR5dBg1yzwSEJoP2dE2/aAXpmad+CNG2hE9Pd5k=
 github.com/Microsoft/go-winio v0.4.5/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA=
@@ -48,8 +78,12 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/dgrijalva/jwt-go v3.1.0+incompatible h1:FFziAwDQQ2dz1XClWMkwvukur3evtZx7x/wMHKM1i20=
 github.com/dgrijalva/jwt-go v3.1.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
+github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
+github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
 github.com/dimchansky/utfbom v1.0.0 h1:fGC2kkf4qOoKqZ4q7iIh+Vef4ubC1c38UDsEyZynZPc=
 github.com/dimchansky/utfbom v1.0.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8=
+github.com/dimchansky/utfbom v1.1.0 h1:FcM3g+nofKgUteL8dm/UpdRXNC9KmADgTpLKsu0TRo4=
+github.com/dimchansky/utfbom v1.1.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8=
 github.com/dnaeon/go-vcr v1.0.1 h1:r8L/HqC0Hje5AXMu1ooW8oyQyOFv4GxqpL0nRP7SLLY=
 github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E=
 github.com/docker/distribution v2.6.0-rc.1.0.20180105232752-277ed486c948+incompatible h1:PVtvnmmxSMUcT5AY6vG7sCCzRg3eyoW6vQvXtITC60c=
@@ -78,6 +112,7 @@ github.com/go-ldap/ldap v3.0.3+incompatible h1:HTeSZO8hWMS1Rgb2Ziku6b8a7qRIZZMHj
 github.com/go-ldap/ldap v3.0.3+incompatible/go.mod h1:qfd9rJvER9Q0/D/Sqn1DfHRoBp40uXYvFoEVrNEPqRc=
 github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
 github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
+github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
 github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
 github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
 github.com/gogo/protobuf v1.1.1 h1:72R+M5VuhED/KujmZVcIquuo8mBgX4oVda//DQb3PXo=
@@ -143,6 +178,8 @@ github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0j
 github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
 github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747 h1:eQox4Rh4ewJF+mqYPxCkmBAirRnPaHEB26UkNuPyjlk=
 github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
+github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
+github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
 github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
 github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
@@ -206,6 +243,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90Pveol
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550 h1:ObdrDkeb4kJdCP557AjRjq69pTHfNouLtWZG7j9rPN8=
 golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
 golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
@@ -226,6 +265,7 @@ golang.org/x/net v0.0.0-20190613194153-d28f0bde5980 h1:dfGZHvZk057jK2MCeWus/TowK
 golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859 h1:R/3boaszxrf1GEUWTVDzSKVwLmSJpwZ1yqXm8j0v2QI=
 golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
+golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI=
 golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
index e38a4775e87f799b3641ac9b50f524b6b9e2df99..3f4fb7482229bc704e9daee1d71a0775aa8ed3fa 100644 (file)
@@ -601,7 +601,7 @@ func (super *Supervisor) autofillConfig(cfg *arvados.Config) error {
                }
                if len(svc.InternalURLs) == 0 {
                        svc.InternalURLs = map[arvados.URL]arvados.ServiceInstance{
-                               arvados.URL{Scheme: "http", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost)), Path: "/"}: arvados.ServiceInstance{},
+                               {Scheme: "http", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost)), Path: "/"}: {},
                        }
                }
        }
index 6de367aa251c4c034b77331befde540782dc89d5..ba8a836dd06aa6a30658a6cf5f7516ca178a815e 100644 (file)
@@ -8,6 +8,7 @@ import (
        "context"
        "encoding/base64"
        "encoding/json"
+       "errors"
        "fmt"
        "net/http"
        "regexp"
@@ -18,7 +19,7 @@ import (
 
        "git.arvados.org/arvados.git/lib/cloud"
        "git.arvados.org/arvados.git/sdk/go/arvados"
-       "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute"
+       "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute"
        "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network"
        storageacct "github.com/Azure/azure-sdk-for-go/services/storage/mgmt/2018-02-01/storage"
        "github.com/Azure/azure-sdk-for-go/storage"
@@ -35,20 +36,23 @@ import (
 var Driver = cloud.DriverFunc(newAzureInstanceSet)
 
 type azureInstanceSetConfig struct {
-       SubscriptionID               string
-       ClientID                     string
-       ClientSecret                 string
-       TenantID                     string
-       CloudEnvironment             string
-       ResourceGroup                string
-       Location                     string
-       Network                      string
-       NetworkResourceGroup         string
-       Subnet                       string
-       StorageAccount               string
-       BlobContainer                string
-       DeleteDanglingResourcesAfter arvados.Duration
-       AdminUsername                string
+       SubscriptionID                 string
+       ClientID                       string
+       ClientSecret                   string
+       TenantID                       string
+       CloudEnvironment               string
+       ResourceGroup                  string
+       ImageResourceGroup             string
+       Location                       string
+       Network                        string
+       NetworkResourceGroup           string
+       Subnet                         string
+       StorageAccount                 string
+       BlobContainer                  string
+       SharedImageGalleryName         string
+       SharedImageGalleryImageVersion string
+       DeleteDanglingResourcesAfter   arvados.Duration
+       AdminUsername                  string
 }
 
 type containerWrapper interface {
@@ -138,6 +142,25 @@ func (cl *interfacesClientImpl) listComplete(ctx context.Context, resourceGroupN
        return r, wrapAzureError(err)
 }
 
+type disksClientWrapper interface {
+       listByResourceGroup(ctx context.Context, resourceGroupName string) (result compute.DiskListPage, err error)
+       delete(ctx context.Context, resourceGroupName string, diskName string) (result compute.DisksDeleteFuture, err error)
+}
+
+type disksClientImpl struct {
+       inner compute.DisksClient
+}
+
+func (cl *disksClientImpl) listByResourceGroup(ctx context.Context, resourceGroupName string) (result compute.DiskListPage, err error) {
+       r, err := cl.inner.ListByResourceGroup(ctx, resourceGroupName)
+       return r, wrapAzureError(err)
+}
+
+func (cl *disksClientImpl) delete(ctx context.Context, resourceGroupName string, diskName string) (result compute.DisksDeleteFuture, err error) {
+       r, err := cl.inner.Delete(ctx, resourceGroupName, diskName)
+       return r, wrapAzureError(err)
+}
+
 var quotaRe = regexp.MustCompile(`(?i:exceed|quota|limit)`)
 
 type azureRateLimitError struct {
@@ -196,20 +219,23 @@ func wrapAzureError(err error) error {
 }
 
 type azureInstanceSet struct {
-       azconfig     azureInstanceSetConfig
-       vmClient     virtualMachinesClientWrapper
-       netClient    interfacesClientWrapper
-       blobcont     containerWrapper
-       azureEnv     azure.Environment
-       interfaces   map[string]network.Interface
-       dispatcherID string
-       namePrefix   string
-       ctx          context.Context
-       stopFunc     context.CancelFunc
-       stopWg       sync.WaitGroup
-       deleteNIC    chan string
-       deleteBlob   chan storage.Blob
-       logger       logrus.FieldLogger
+       azconfig           azureInstanceSetConfig
+       vmClient           virtualMachinesClientWrapper
+       netClient          interfacesClientWrapper
+       disksClient        disksClientWrapper
+       imageResourceGroup string
+       blobcont           containerWrapper
+       azureEnv           azure.Environment
+       interfaces         map[string]network.Interface
+       dispatcherID       string
+       namePrefix         string
+       ctx                context.Context
+       stopFunc           context.CancelFunc
+       stopWg             sync.WaitGroup
+       deleteNIC          chan string
+       deleteBlob         chan storage.Blob
+       deleteDisk         chan compute.Disk
+       logger             logrus.FieldLogger
 }
 
 func newAzureInstanceSet(config json.RawMessage, dispatcherID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger) (prv cloud.InstanceSet, err error) {
@@ -233,6 +259,7 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
        az.azconfig = azcfg
        vmClient := compute.NewVirtualMachinesClient(az.azconfig.SubscriptionID)
        netClient := network.NewInterfacesClient(az.azconfig.SubscriptionID)
+       disksClient := compute.NewDisksClient(az.azconfig.SubscriptionID)
        storageAcctClient := storageacct.NewAccountsClient(az.azconfig.SubscriptionID)
 
        az.azureEnv, err = azure.EnvironmentFromName(az.azconfig.CloudEnvironment)
@@ -253,26 +280,38 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
 
        vmClient.Authorizer = authorizer
        netClient.Authorizer = authorizer
+       disksClient.Authorizer = authorizer
        storageAcctClient.Authorizer = authorizer
 
        az.vmClient = &virtualMachinesClientImpl{vmClient}
        az.netClient = &interfacesClientImpl{netClient}
+       az.disksClient = &disksClientImpl{disksClient}
 
-       result, err := storageAcctClient.ListKeys(az.ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount)
-       if err != nil {
-               az.logger.WithError(err).Warn("Couldn't get account keys")
-               return err
+       az.imageResourceGroup = az.azconfig.ImageResourceGroup
+       if az.imageResourceGroup == "" {
+               az.imageResourceGroup = az.azconfig.ResourceGroup
        }
 
-       key1 := *(*result.Keys)[0].Value
-       client, err := storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv)
-       if err != nil {
-               az.logger.WithError(err).Warn("Couldn't make client")
-               return err
-       }
+       var client storage.Client
+       if az.azconfig.StorageAccount != "" && az.azconfig.BlobContainer != "" {
+               result, err := storageAcctClient.ListKeys(az.ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount)
+               if err != nil {
+                       az.logger.WithError(err).Warn("Couldn't get account keys")
+                       return err
+               }
 
-       blobsvc := client.GetBlobService()
-       az.blobcont = blobsvc.GetContainerReference(az.azconfig.BlobContainer)
+               key1 := *(*result.Keys)[0].Value
+               client, err = storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv)
+               if err != nil {
+                       az.logger.WithError(err).Warn("Couldn't make client")
+                       return err
+               }
+
+               blobsvc := client.GetBlobService()
+               az.blobcont = blobsvc.GetContainerReference(az.azconfig.BlobContainer)
+       } else if az.azconfig.StorageAccount != "" || az.azconfig.BlobContainer != "" {
+               az.logger.Error("Invalid configuration: StorageAccount and BlobContainer must both be empty or both be set")
+       }
 
        az.dispatcherID = dispatcherID
        az.namePrefix = fmt.Sprintf("compute-%s-", az.dispatcherID)
@@ -288,21 +327,21 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
                                tk.Stop()
                                return
                        case <-tk.C:
-                               az.manageBlobs()
+                               if az.blobcont != nil {
+                                       az.manageBlobs()
+                               }
+                               az.manageDisks()
                        }
                }
        }()
 
        az.deleteNIC = make(chan string)
        az.deleteBlob = make(chan storage.Blob)
+       az.deleteDisk = make(chan compute.Disk)
 
        for i := 0; i < 4; i++ {
                go func() {
-                       for {
-                               nicname, ok := <-az.deleteNIC
-                               if !ok {
-                                       return
-                               }
+                       for nicname := range az.deleteNIC {
                                _, delerr := az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, nicname)
                                if delerr != nil {
                                        az.logger.WithError(delerr).Warnf("Error deleting %v", nicname)
@@ -312,11 +351,7 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
                        }
                }()
                go func() {
-                       for {
-                               blob, ok := <-az.deleteBlob
-                               if !ok {
-                                       return
-                               }
+                       for blob := range az.deleteBlob {
                                err := blob.Delete(nil)
                                if err != nil {
                                        az.logger.WithError(err).Warnf("Error deleting %v", blob.Name)
@@ -325,11 +360,28 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
                                }
                        }
                }()
+               go func() {
+                       for disk := range az.deleteDisk {
+                               _, err := az.disksClient.delete(az.ctx, az.imageResourceGroup, *disk.Name)
+                               if err != nil {
+                                       az.logger.WithError(err).Warnf("Error deleting disk %+v", *disk.Name)
+                               } else {
+                                       az.logger.Printf("Deleted disk %v", *disk.Name)
+                               }
+                       }
+               }()
        }
 
        return nil
 }
 
+func (az *azureInstanceSet) cleanupNic(nic network.Interface) {
+       _, delerr := az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, *nic.Name)
+       if delerr != nil {
+               az.logger.WithError(delerr).Warnf("Error cleaning up NIC after failed create")
+       }
+}
+
 func (az *azureInstanceSet) Create(
        instanceType arvados.InstanceType,
        imageID cloud.ImageID,
@@ -389,14 +441,55 @@ func (az *azureInstanceSet) Create(
                return nil, wrapAzureError(err)
        }
 
-       blobname := fmt.Sprintf("%s-os.vhd", name)
-       instanceVhd := fmt.Sprintf("https://%s.blob.%s/%s/%s",
-               az.azconfig.StorageAccount,
-               az.azureEnv.StorageEndpointSuffix,
-               az.azconfig.BlobContainer,
-               blobname)
-
+       var blobname string
        customData := base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\n" + initCommand + "\n"))
+       var storageProfile *compute.StorageProfile
+
+       re := regexp.MustCompile(`^http(s?)://`)
+       if re.MatchString(string(imageID)) {
+               if az.blobcont == nil {
+                       az.cleanupNic(nic)
+                       return nil, wrapAzureError(errors.New("Invalid configuration: can't configure unmanaged image URL without StorageAccount and BlobContainer"))
+               }
+               blobname = fmt.Sprintf("%s-os.vhd", name)
+               instanceVhd := fmt.Sprintf("https://%s.blob.%s/%s/%s",
+                       az.azconfig.StorageAccount,
+                       az.azureEnv.StorageEndpointSuffix,
+                       az.azconfig.BlobContainer,
+                       blobname)
+               az.logger.Warn("using deprecated unmanaged image, see https://doc.arvados.org/ to migrate to managed disks")
+               storageProfile = &compute.StorageProfile{
+                       OsDisk: &compute.OSDisk{
+                               OsType:       compute.Linux,
+                               Name:         to.StringPtr(name + "-os"),
+                               CreateOption: compute.DiskCreateOptionTypesFromImage,
+                               Image: &compute.VirtualHardDisk{
+                                       URI: to.StringPtr(string(imageID)),
+                               },
+                               Vhd: &compute.VirtualHardDisk{
+                                       URI: &instanceVhd,
+                               },
+                       },
+               }
+       } else {
+               id := to.StringPtr("/subscriptions/" + az.azconfig.SubscriptionID + "/resourceGroups/" + az.imageResourceGroup + "/providers/Microsoft.Compute/images/" + string(imageID))
+               if az.azconfig.SharedImageGalleryName != "" && az.azconfig.SharedImageGalleryImageVersion != "" {
+                       id = to.StringPtr("/subscriptions/" + az.azconfig.SubscriptionID + "/resourceGroups/" + az.imageResourceGroup + "/providers/Microsoft.Compute/galleries/" + az.azconfig.SharedImageGalleryName + "/images/" + string(imageID) + "/versions/" + az.azconfig.SharedImageGalleryImageVersion)
+               } else if az.azconfig.SharedImageGalleryName != "" || az.azconfig.SharedImageGalleryImageVersion != "" {
+                       az.cleanupNic(nic)
+                       return nil, wrapAzureError(errors.New("Invalid configuration: SharedImageGalleryName and SharedImageGalleryImageVersion must both be set or both be empty"))
+               }
+               storageProfile = &compute.StorageProfile{
+                       ImageReference: &compute.ImageReference{
+                               ID: id,
+                       },
+                       OsDisk: &compute.OSDisk{
+                               OsType:       compute.Linux,
+                               Name:         to.StringPtr(name + "-os"),
+                               CreateOption: compute.DiskCreateOptionTypesFromImage,
+                       },
+               }
+       }
 
        vmParameters := compute.VirtualMachine{
                Location: &az.azconfig.Location,
@@ -405,19 +498,7 @@ func (az *azureInstanceSet) Create(
                        HardwareProfile: &compute.HardwareProfile{
                                VMSize: compute.VirtualMachineSizeTypes(instanceType.ProviderType),
                        },
-                       StorageProfile: &compute.StorageProfile{
-                               OsDisk: &compute.OSDisk{
-                                       OsType:       compute.Linux,
-                                       Name:         to.StringPtr(name + "-os"),
-                                       CreateOption: compute.FromImage,
-                                       Image: &compute.VirtualHardDisk{
-                                               URI: to.StringPtr(string(imageID)),
-                                       },
-                                       Vhd: &compute.VirtualHardDisk{
-                                               URI: &instanceVhd,
-                                       },
-                               },
-                       },
+                       StorageProfile: storageProfile,
                        NetworkProfile: &compute.NetworkProfile{
                                NetworkInterfaces: &[]compute.NetworkInterfaceReference{
                                        compute.NetworkInterfaceReference{
@@ -449,15 +530,21 @@ func (az *azureInstanceSet) Create(
 
        vm, err := az.vmClient.createOrUpdate(az.ctx, az.azconfig.ResourceGroup, name, vmParameters)
        if err != nil {
-               _, delerr := az.blobcont.GetBlobReference(blobname).DeleteIfExists(nil)
-               if delerr != nil {
-                       az.logger.WithError(delerr).Warnf("Error cleaning up vhd blob after failed create")
+               // Do some cleanup. Otherwise, an unbounded number of new unused nics and
+               // blobs can pile up during times when VMs can't be created and the
+               // dispatcher keeps retrying, because the garbage collection in manageBlobs
+               // and manageNics is only triggered periodically. This is most important
+               // for nics, because those are subject to a quota.
+               az.cleanupNic(nic)
+
+               if blobname != "" {
+                       _, delerr := az.blobcont.GetBlobReference(blobname).DeleteIfExists(nil)
+                       if delerr != nil {
+                               az.logger.WithError(delerr).Warnf("Error cleaning up vhd blob after failed create")
+                       }
                }
 
-               _, delerr = az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, *nic.Name)
-               if delerr != nil {
-                       az.logger.WithError(delerr).Warnf("Error cleaning up NIC after failed create")
-               }
+               // Leave cleaning up of managed disks to the garbage collection in manageDisks()
 
                return nil, wrapAzureError(err)
        }
@@ -497,7 +584,7 @@ func (az *azureInstanceSet) Instances(cloud.InstanceTags) ([]cloud.Instance, err
        return instances, nil
 }
 
-// ManageNics returns a list of Azure network interface resources.
+// manageNics returns a list of Azure network interface resources.
 // Also performs garbage collection of NICs which have "namePrefix",
 // are not associated with a virtual machine and have a "created-at"
 // time more than DeleteDanglingResourcesAfter (to prevent racing and
@@ -538,7 +625,7 @@ func (az *azureInstanceSet) manageNics() (map[string]network.Interface, error) {
        return interfaces, nil
 }
 
-// ManageBlobs garbage collects blobs (VM disk images) in the
+// manageBlobs garbage collects blobs (VM disk images) in the
 // configured storage account container.  It will delete blobs which
 // have "namePrefix", are "available" (which means they are not
 // leased to a VM) and haven't been modified for
@@ -573,11 +660,45 @@ func (az *azureInstanceSet) manageBlobs() {
        }
 }
 
+// manageDisks garbage collects managed compute disks (VM disk images) in the
+// configured resource group.  It will delete disks which have "namePrefix",
+// are "unattached" (which means they are not leased to a VM) and were created
+// more than DeleteDanglingResourcesAfter seconds ago.  (Azure provides no
+// modification timestamp on managed disks, there is only a creation timestamp)
+func (az *azureInstanceSet) manageDisks() {
+
+       re := regexp.MustCompile(`^` + regexp.QuoteMeta(az.namePrefix) + `.*-os$`)
+       threshold := time.Now().Add(-az.azconfig.DeleteDanglingResourcesAfter.Duration())
+
+       response, err := az.disksClient.listByResourceGroup(az.ctx, az.imageResourceGroup)
+       if err != nil {
+               az.logger.WithError(err).Warn("Error listing disks")
+               return
+       }
+
+       for ; response.NotDone(); err = response.Next() {
+               if err != nil {
+                       az.logger.WithError(err).Warn("Error getting next page of disks")
+                       return
+               }
+               for _, d := range response.Values() {
+                       if d.DiskProperties.DiskState == compute.Unattached &&
+                               d.Name != nil && re.MatchString(*d.Name) &&
+                               d.DiskProperties.TimeCreated.ToTime().Before(threshold) {
+
+                               az.logger.Printf("Disk %v is unlocked and was created at %+v, will delete", *d.Name, d.DiskProperties.TimeCreated.ToTime())
+                               az.deleteDisk <- d
+                       }
+               }
+       }
+}
+
 func (az *azureInstanceSet) Stop() {
        az.stopFunc()
        az.stopWg.Wait()
        close(az.deleteNIC)
        close(az.deleteBlob)
+       close(az.deleteDisk)
 }
 
 type azureInstance struct {
index 94af0b9a26dc8c7587b0d5e87bba013216c3f266..7b5a34df59798b781222cf52131fee0d1e7eade0 100644 (file)
@@ -47,7 +47,7 @@ import (
        "git.arvados.org/arvados.git/lib/dispatchcloud/test"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/config"
-       "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute"
+       "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute"
        "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network"
        "github.com/Azure/azure-sdk-for-go/storage"
        "github.com/Azure/go-autorest/autorest"
@@ -156,6 +156,7 @@ func GetInstanceSet() (cloud.InstanceSet, cloud.ImageID, arvados.Cluster, error)
                logger:       logrus.StandardLogger(),
                deleteNIC:    make(chan string),
                deleteBlob:   make(chan storage.Blob),
+               deleteDisk:   make(chan compute.Disk),
        }
        ap.ctx, ap.stopFunc = context.WithCancel(context.Background())
        ap.vmClient = &VirtualMachinesClientStub{}
index f2e27952e19fd1cde6caccd19b41a20411fb73ee..80294afaf35f1f701928f0c1ce99c3c09ca35e09 100644 (file)
@@ -139,9 +139,6 @@ Clusters:
       Workbench2:
         InternalURLs: {}
         ExternalURL: ""
-      Nodemanager:
-        InternalURLs: {}
-        ExternalURL: "-"
       Health:
         InternalURLs: {}
         ExternalURL: "-"
@@ -486,6 +483,9 @@ Clusters:
       # Use of this feature is not recommended, if it can be avoided.
       ForwardSlashNameSubstitution: ""
 
+      # Include "folder objects" in S3 ListObjects responses.
+      S3FolderObjects: true
+
       # Managed collection properties. At creation time, if the client didn't
       # provide the listed keys, they will be automatically populated following
       # one of the following behaviors:
@@ -689,6 +689,16 @@ Clusters:
         ProviderAppID: ""
         ProviderAppSecret: ""
 
+      Test:
+        # Authenticate users listed here in the config file. This
+        # feature is intended to be used in test environments, and
+        # should not be used in production.
+        Enable: false
+        Users:
+          SAMPLE:
+            Email: alice@example.com
+            Password: xyzzy
+
       # The cluster ID to delegate the user database.  When set,
       # logins on this cluster will be redirected to the login cluster
       # (login cluster must appear in RemoteClusters with Proxy: true)
@@ -698,6 +708,11 @@ Clusters:
       # remain valid before it needs to be revalidated.
       RemoteTokenRefresh: 5m
 
+      # How long a client token created from a login flow will be valid without
+      # asking the user to re-login. Example values: 60m, 8h.
+      # Default value zero means tokens don't have expiration.
+      TokenLifetime: 0s
+
     Git:
       # Path to git or gitolite-shell executable. Each authenticated
       # request will execute this program with the single argument "http-backend"
@@ -952,6 +967,12 @@ Clusters:
         TimeoutShutdown: 10s
 
         # Worker VM image ID.
+        # (aws) AMI identifier
+        # (azure) managed disks: the name of the managed disk image
+        # (azure) shared image gallery: the name of the image definition. Also
+        # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields.
+        # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g.
+        # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
         ImageID: ""
 
         # An executable file (located on the dispatcher host) to be
@@ -1020,7 +1041,16 @@ Clusters:
           Network: ""
           Subnet: ""
 
-          # (azure) Where to store the VM VHD blobs
+          # (azure) managed disks: The resource group where the managed disk
+          # image can be found (if different from ResourceGroup).
+          ImageResourceGroup: ""
+
+          # (azure) shared image gallery: the name of the gallery
+          SharedImageGalleryName: ""
+          # (azure) shared image gallery: the version of the image definition
+          SharedImageGalleryImageVersion: ""
+
+          # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs
           StorageAccount: ""
           BlobContainer: ""
 
index 1be7208ee38facce00e71f2cfdf07885ccffde08..0552b66adb80bed162ee3f2518a1c649c0c89ec2 100644 (file)
@@ -43,7 +43,6 @@ type nodeProfile struct {
        Keepproxy     systemServiceInstance `json:"keepproxy"`
        Keepstore     systemServiceInstance `json:"keepstore"`
        Keepweb       systemServiceInstance `json:"keep-web"`
-       Nodemanager   systemServiceInstance `json:"arvados-node-manager"`
        DispatchCloud systemServiceInstance `json:"arvados-dispatch-cloud"`
        RailsAPI      systemServiceInstance `json:"arvados-api-server"`
        Websocket     systemServiceInstance `json:"arvados-ws"`
index d6b02b750de122582e35a5aa34b508861106ac40..b203dff26a1b80272be29f41dd747c8108beb272 100644 (file)
@@ -59,10 +59,10 @@ func ExportJSON(w io.Writer, cluster *arvados.Cluster) error {
 // exists.
 var whitelist = map[string]bool{
        // | sort -t'"' -k2,2
-       "ClusterID":                                    true,
        "API":                                          true,
        "API.AsyncPermissionsUpdateInterval":           false,
        "API.DisabledAPIs":                             false,
+       "API.KeepServiceRequestTimeout":                false,
        "API.MaxConcurrentRequests":                    false,
        "API.MaxIndexDatabaseRead":                     false,
        "API.MaxItemsPerResponse":                      true,
@@ -71,24 +71,29 @@ var whitelist = map[string]bool{
        "API.MaxRequestSize":                           true,
        "API.RailsSessionSecretToken":                  false,
        "API.RequestTimeout":                           true,
-       "API.WebsocketClientEventQueue":                false,
        "API.SendTimeout":                              true,
+       "API.WebsocketClientEventQueue":                false,
        "API.WebsocketServerEventQueue":                false,
-       "API.KeepServiceRequestTimeout":                false,
        "AuditLogs":                                    false,
        "AuditLogs.MaxAge":                             false,
        "AuditLogs.MaxDeleteBatch":                     false,
        "AuditLogs.UnloggedAttributes":                 false,
+       "ClusterID":                                    true,
        "Collections":                                  true,
+       "Collections.BalanceCollectionBatch":           false,
+       "Collections.BalanceCollectionBuffers":         false,
+       "Collections.BalancePeriod":                    false,
+       "Collections.BalanceTimeout":                   false,
+       "Collections.BlobDeleteConcurrency":            false,
+       "Collections.BlobMissingReport":                false,
+       "Collections.BlobReplicateConcurrency":         false,
        "Collections.BlobSigning":                      true,
        "Collections.BlobSigningKey":                   false,
        "Collections.BlobSigningTTL":                   true,
        "Collections.BlobTrash":                        false,
-       "Collections.BlobTrashLifetime":                false,
-       "Collections.BlobTrashConcurrency":             false,
        "Collections.BlobTrashCheckInterval":           false,
-       "Collections.BlobDeleteConcurrency":            false,
-       "Collections.BlobReplicateConcurrency":         false,
+       "Collections.BlobTrashConcurrency":             false,
+       "Collections.BlobTrashLifetime":                false,
        "Collections.CollectionVersioning":             false,
        "Collections.DefaultReplication":               true,
        "Collections.DefaultTrashLifetime":             true,
@@ -97,18 +102,14 @@ var whitelist = map[string]bool{
        "Collections.ManagedProperties.*":              true,
        "Collections.ManagedProperties.*.*":            true,
        "Collections.PreserveVersionIfIdle":            true,
+       "Collections.S3FolderObjects":                  true,
        "Collections.TrashSweepInterval":               false,
        "Collections.TrustAllContent":                  false,
        "Collections.WebDAVCache":                      false,
-       "Collections.BalanceCollectionBatch":           false,
-       "Collections.BalancePeriod":                    false,
-       "Collections.BalanceTimeout":                   false,
-       "Collections.BlobMissingReport":                false,
-       "Collections.BalanceCollectionBuffers":         false,
        "Containers":                                   true,
        "Containers.CloudVMs":                          false,
-       "Containers.CrunchRunCommand":                  false,
        "Containers.CrunchRunArgumentsList":            false,
+       "Containers.CrunchRunCommand":                  false,
        "Containers.DefaultKeepCacheRAM":               true,
        "Containers.DispatchPrivateKey":                false,
        "Containers.JobsAPI":                           true,
@@ -155,28 +156,32 @@ var whitelist = map[string]bool{
        "Login.OpenIDConnect":                          true,
        "Login.OpenIDConnect.ClientID":                 false,
        "Login.OpenIDConnect.ClientSecret":             false,
-       "Login.OpenIDConnect.Enable":                   true,
-       "Login.OpenIDConnect.Issuer":                   false,
        "Login.OpenIDConnect.EmailClaim":               false,
        "Login.OpenIDConnect.EmailVerifiedClaim":       false,
+       "Login.OpenIDConnect.Enable":                   true,
+       "Login.OpenIDConnect.Issuer":                   false,
        "Login.OpenIDConnect.UsernameClaim":            false,
        "Login.PAM":                                    true,
        "Login.PAM.DefaultEmailDomain":                 false,
        "Login.PAM.Enable":                             true,
        "Login.PAM.Service":                            false,
+       "Login.RemoteTokenRefresh":                     true,
        "Login.SSO":                                    true,
        "Login.SSO.Enable":                             true,
        "Login.SSO.ProviderAppID":                      false,
        "Login.SSO.ProviderAppSecret":                  false,
-       "Login.RemoteTokenRefresh":                     true,
+       "Login.Test":                                   true,
+       "Login.Test.Enable":                            true,
+       "Login.Test.Users":                             false,
+       "Login.TokenLifetime":                          false,
        "Mail":                                         true,
+       "Mail.EmailFrom":                               false,
+       "Mail.IssueReporterEmailFrom":                  false,
+       "Mail.IssueReporterEmailTo":                    false,
        "Mail.MailchimpAPIKey":                         false,
        "Mail.MailchimpListID":                         false,
        "Mail.SendUserSetupNotificationEmail":          false,
-       "Mail.IssueReporterEmailFrom":                  false,
-       "Mail.IssueReporterEmailTo":                    false,
        "Mail.SupportEmailAddress":                     true,
-       "Mail.EmailFrom":                               false,
        "ManagementToken":                              false,
        "PostgreSQL":                                   false,
        "RemoteClusters":                               true,
@@ -194,8 +199,8 @@ var whitelist = map[string]bool{
        "SystemRootToken":                              false,
        "TLS":                                          false,
        "Users":                                        true,
-       "Users.AnonymousUserToken":                     true,
        "Users.AdminNotifierEmailFrom":                 false,
+       "Users.AnonymousUserToken":                     true,
        "Users.AutoAdminFirstUser":                     false,
        "Users.AutoAdminUserWithEmail":                 false,
        "Users.AutoSetupNewUsers":                      false,
@@ -232,6 +237,7 @@ var whitelist = map[string]bool{
        "Workbench.EnableGettingStartedPopup":          true,
        "Workbench.EnablePublicProjectsPage":           true,
        "Workbench.FileViewersConfigURL":               true,
+       "Workbench.InactivePageHTML":                   true,
        "Workbench.LogViewerMaxBytes":                  true,
        "Workbench.MultiSiteSearch":                    true,
        "Workbench.ProfilingEnabled":                   true,
@@ -243,6 +249,8 @@ var whitelist = map[string]bool{
        "Workbench.ShowUserAgreementInline":            true,
        "Workbench.ShowUserNotifications":              true,
        "Workbench.SiteName":                           true,
+       "Workbench.SSHHelpHostSuffix":                  true,
+       "Workbench.SSHHelpPageHTML":                    true,
        "Workbench.Theme":                              true,
        "Workbench.UserProfileFormFields":              true,
        "Workbench.UserProfileFormFields.*":            true,
@@ -251,9 +259,6 @@ var whitelist = map[string]bool{
        "Workbench.UserProfileFormMessage":             true,
        "Workbench.VocabularyURL":                      true,
        "Workbench.WelcomePageHTML":                    true,
-       "Workbench.InactivePageHTML":                   true,
-       "Workbench.SSHHelpPageHTML":                    true,
-       "Workbench.SSHHelpHostSuffix":                  true,
 }
 
 func redactUnsafe(m map[string]interface{}, mPrefix, lookupPrefix string) error {
index 81d402a6a1cb84db3be61bed1320903a6205a9db..57204cf36a2dbe49731c2d7cc32ad51f09522f0a 100644 (file)
@@ -145,9 +145,6 @@ Clusters:
       Workbench2:
         InternalURLs: {}
         ExternalURL: ""
-      Nodemanager:
-        InternalURLs: {}
-        ExternalURL: "-"
       Health:
         InternalURLs: {}
         ExternalURL: "-"
@@ -492,6 +489,9 @@ Clusters:
       # Use of this feature is not recommended, if it can be avoided.
       ForwardSlashNameSubstitution: ""
 
+      # Include "folder objects" in S3 ListObjects responses.
+      S3FolderObjects: true
+
       # Managed collection properties. At creation time, if the client didn't
       # provide the listed keys, they will be automatically populated following
       # one of the following behaviors:
@@ -695,6 +695,16 @@ Clusters:
         ProviderAppID: ""
         ProviderAppSecret: ""
 
+      Test:
+        # Authenticate users listed here in the config file. This
+        # feature is intended to be used in test environments, and
+        # should not be used in production.
+        Enable: false
+        Users:
+          SAMPLE:
+            Email: alice@example.com
+            Password: xyzzy
+
       # The cluster ID to delegate the user database.  When set,
       # logins on this cluster will be redirected to the login cluster
       # (login cluster must appear in RemoteClusters with Proxy: true)
@@ -704,6 +714,11 @@ Clusters:
       # remain valid before it needs to be revalidated.
       RemoteTokenRefresh: 5m
 
+      # How long a client token created from a login flow will be valid without
+      # asking the user to re-login. Example values: 60m, 8h.
+      # Default value zero means tokens don't have expiration.
+      TokenLifetime: 0s
+
     Git:
       # Path to git or gitolite-shell executable. Each authenticated
       # request will execute this program with the single argument "http-backend"
@@ -958,6 +973,12 @@ Clusters:
         TimeoutShutdown: 10s
 
         # Worker VM image ID.
+        # (aws) AMI identifier
+        # (azure) managed disks: the name of the managed disk image
+        # (azure) shared image gallery: the name of the image definition. Also
+        # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields.
+        # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g.
+        # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
         ImageID: ""
 
         # An executable file (located on the dispatcher host) to be
@@ -1026,7 +1047,16 @@ Clusters:
           Network: ""
           Subnet: ""
 
-          # (azure) Where to store the VM VHD blobs
+          # (azure) managed disks: The resource group where the managed disk
+          # image can be found (if different from ResourceGroup).
+          ImageResourceGroup: ""
+
+          # (azure) shared image gallery: the name of the gallery
+          SharedImageGalleryName: ""
+          # (azure) shared image gallery: the version of the image definition
+          SharedImageGalleryImageVersion: ""
+
+          # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs
           StorageAccount: ""
           BlobContainer: ""
 
index 256afc8e6b9482d53eaa520927f62761a1f71b03..5079b402b7208d59bb78c0420b2da547a408b717 100644 (file)
@@ -38,7 +38,7 @@ func (s *FederationSuite) SetUpTest(c *check.C) {
                ClusterID:       "aaaaa",
                SystemRootToken: arvadostest.SystemRootToken,
                RemoteClusters: map[string]arvados.RemoteCluster{
-                       "aaaaa": arvados.RemoteCluster{
+                       "aaaaa": {
                                Host: os.Getenv("ARVADOS_API_HOST"),
                        },
                },
index e742bbc59b08a3a01a8302fcadb2cda6042cded9..2dd1d816e060a752fb8e71d4eeaacc5d0b3cfb9b 100644 (file)
@@ -137,7 +137,7 @@ func (h *Handler) db(ctx context.Context) (*sqlx.DB, error) {
                db.SetMaxOpenConns(p)
        }
        if err := db.Ping(); err != nil {
-               ctxlog.FromContext(ctx).WithError(err).Error("postgresql connect scuceeded but ping failed")
+               ctxlog.FromContext(ctx).WithError(err).Error("postgresql connect succeeded but ping failed")
                return nil, errDBConnection
        }
        h.pgdb = db
index ee1ea56924c5700d25e43262347d1045d534ca5c..12674148426133fa97f1ef786b38cf6803b1c376 100644 (file)
@@ -33,8 +33,13 @@ func chooseLoginController(cluster *arvados.Cluster, railsProxy *railsProxy) log
        wantSSO := cluster.Login.SSO.Enable
        wantPAM := cluster.Login.PAM.Enable
        wantLDAP := cluster.Login.LDAP.Enable
+       wantTest := cluster.Login.Test.Enable
        switch {
-       case wantGoogle && !wantOpenIDConnect && !wantSSO && !wantPAM && !wantLDAP:
+       case 1 != countTrue(wantGoogle, wantOpenIDConnect, wantSSO, wantPAM, wantLDAP, wantTest):
+               return errorLoginController{
+                       error: errors.New("configuration problem: exactly one of Login.Google, Login.OpenIDConnect, Login.SSO, Login.PAM, Login.LDAP, and Login.Test must be enabled"),
+               }
+       case wantGoogle:
                return &oidcLoginController{
                        Cluster:            cluster,
                        RailsProxy:         railsProxy,
@@ -45,7 +50,7 @@ func chooseLoginController(cluster *arvados.Cluster, railsProxy *railsProxy) log
                        EmailClaim:         "email",
                        EmailVerifiedClaim: "email_verified",
                }
-       case !wantGoogle && wantOpenIDConnect && !wantSSO && !wantPAM && !wantLDAP:
+       case wantOpenIDConnect:
                return &oidcLoginController{
                        Cluster:            cluster,
                        RailsProxy:         railsProxy,
@@ -56,17 +61,29 @@ func chooseLoginController(cluster *arvados.Cluster, railsProxy *railsProxy) log
                        EmailVerifiedClaim: cluster.Login.OpenIDConnect.EmailVerifiedClaim,
                        UsernameClaim:      cluster.Login.OpenIDConnect.UsernameClaim,
                }
-       case !wantGoogle && !wantOpenIDConnect && wantSSO && !wantPAM && !wantLDAP:
+       case wantSSO:
                return &ssoLoginController{railsProxy}
-       case !wantGoogle && !wantOpenIDConnect && !wantSSO && wantPAM && !wantLDAP:
+       case wantPAM:
                return &pamLoginController{Cluster: cluster, RailsProxy: railsProxy}
-       case !wantGoogle && !wantOpenIDConnect && !wantSSO && !wantPAM && wantLDAP:
+       case wantLDAP:
                return &ldapLoginController{Cluster: cluster, RailsProxy: railsProxy}
+       case wantTest:
+               return &testLoginController{Cluster: cluster, RailsProxy: railsProxy}
        default:
                return errorLoginController{
-                       error: errors.New("configuration problem: exactly one of Login.Google, Login.OpenIDConnect, Login.SSO, Login.PAM, and Login.LDAP must be enabled"),
+                       error: errors.New("BUG: missing case in login controller setup switch"),
+               }
+       }
+}
+
+func countTrue(vals ...bool) int {
+       n := 0
+       for _, val := range vals {
+               if val {
+                       n++
                }
        }
+       return n
 }
 
 // Login and Logout are passed through to the wrapped railsProxy;
index 700d757c274d707c703ad0c58dbac812440a45a6..bce1ecfcf260696247bb83de3cdce2fa9d27cabe 100644 (file)
@@ -64,7 +64,7 @@ func (s *LDAPSuite) SetUpSuite(c *check.C) {
                                                return []*godap.LDAPSimpleSearchResultEntry{}
                                        }
                                        return []*godap.LDAPSimpleSearchResultEntry{
-                                               &godap.LDAPSimpleSearchResultEntry{
+                                               {
                                                        DN: "cn=" + req.FilterValue + "," + req.BaseDN,
                                                        Attrs: map[string]interface{}{
                                                                "SN":   req.FilterValue,
diff --git a/lib/controller/localdb/login_testuser.go b/lib/controller/localdb/login_testuser.go
new file mode 100644 (file)
index 0000000..5a3d803
--- /dev/null
@@ -0,0 +1,45 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package localdb
+
+import (
+       "context"
+       "errors"
+       "fmt"
+
+       "git.arvados.org/arvados.git/lib/controller/rpc"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "github.com/sirupsen/logrus"
+)
+
+type testLoginController struct {
+       Cluster    *arvados.Cluster
+       RailsProxy *railsProxy
+}
+
+func (ctrl *testLoginController) Logout(ctx context.Context, opts arvados.LogoutOptions) (arvados.LogoutResponse, error) {
+       return noopLogout(ctrl.Cluster, opts)
+}
+
+func (ctrl *testLoginController) Login(ctx context.Context, opts arvados.LoginOptions) (arvados.LoginResponse, error) {
+       return arvados.LoginResponse{}, errors.New("interactive login is not available")
+}
+
+func (ctrl *testLoginController) UserAuthenticate(ctx context.Context, opts arvados.UserAuthenticateOptions) (arvados.APIClientAuthorization, error) {
+       for username, user := range ctrl.Cluster.Login.Test.Users {
+               if (opts.Username == username || opts.Username == user.Email) && opts.Password == user.Password {
+                       ctxlog.FromContext(ctx).WithFields(logrus.Fields{
+                               "username": username,
+                               "email":    user.Email,
+                       }).Debug("test authentication succeeded")
+                       return createAPIClientAuthorization(ctx, ctrl.RailsProxy, ctrl.Cluster.SystemRootToken, rpc.UserSessionAuthInfo{
+                               Username: username,
+                               Email:    user.Email,
+                       })
+               }
+       }
+       return arvados.APIClientAuthorization{}, fmt.Errorf("authentication failed for user %q with password len=%d", opts.Username, len(opts.Password))
+}
diff --git a/lib/controller/localdb/login_testuser_test.go b/lib/controller/localdb/login_testuser_test.go
new file mode 100644 (file)
index 0000000..d2d651e
--- /dev/null
@@ -0,0 +1,94 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package localdb
+
+import (
+       "context"
+
+       "git.arvados.org/arvados.git/lib/config"
+       "git.arvados.org/arvados.git/lib/controller/rpc"
+       "git.arvados.org/arvados.git/lib/ctrlctx"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/arvadostest"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "github.com/jmoiron/sqlx"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&TestUserSuite{})
+
+type TestUserSuite struct {
+       cluster  *arvados.Cluster
+       ctrl     *testLoginController
+       railsSpy *arvadostest.Proxy
+       db       *sqlx.DB
+
+       // transaction context
+       ctx      context.Context
+       rollback func() error
+}
+
+func (s *TestUserSuite) SetUpSuite(c *check.C) {
+       cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
+       c.Assert(err, check.IsNil)
+       s.cluster, err = cfg.GetCluster("")
+       c.Assert(err, check.IsNil)
+       s.cluster.Login.Test.Enable = true
+       s.cluster.Login.Test.Users = map[string]arvados.TestUser{
+               "valid": {Email: "valid@example.com", Password: "v@l1d"},
+       }
+       s.railsSpy = arvadostest.NewProxy(c, s.cluster.Services.RailsAPI)
+       s.ctrl = &testLoginController{
+               Cluster:    s.cluster,
+               RailsProxy: rpc.NewConn(s.cluster.ClusterID, s.railsSpy.URL, true, rpc.PassthroughTokenProvider),
+       }
+       s.db = arvadostest.DB(c, s.cluster)
+}
+
+func (s *TestUserSuite) SetUpTest(c *check.C) {
+       tx, err := s.db.Beginx()
+       c.Assert(err, check.IsNil)
+       s.ctx = ctrlctx.NewWithTransaction(context.Background(), tx)
+       s.rollback = tx.Rollback
+}
+
+func (s *TestUserSuite) TearDownTest(c *check.C) {
+       if s.rollback != nil {
+               s.rollback()
+       }
+}
+
+func (s *TestUserSuite) TestLogin(c *check.C) {
+       for _, trial := range []struct {
+               success  bool
+               username string
+               password string
+       }{
+               {false, "foo", "bar"},
+               {false, "", ""},
+               {false, "valid", ""},
+               {false, "", "v@l1d"},
+               {true, "valid", "v@l1d"},
+               {true, "valid@example.com", "v@l1d"},
+       } {
+               c.Logf("=== %#v", trial)
+               resp, err := s.ctrl.UserAuthenticate(s.ctx, arvados.UserAuthenticateOptions{
+                       Username: trial.username,
+                       Password: trial.password,
+               })
+               if trial.success {
+                       c.Check(err, check.IsNil)
+                       c.Check(resp.APIToken, check.Not(check.Equals), "")
+                       c.Check(resp.UUID, check.Matches, `zzzzz-gj3su-.*`)
+                       c.Check(resp.Scopes, check.DeepEquals, []string{"all"})
+
+                       authinfo := getCallbackAuthInfo(c, s.railsSpy)
+                       c.Check(authinfo.Email, check.Equals, "valid@example.com")
+                       c.Check(authinfo.AlternateEmails, check.DeepEquals, []string(nil))
+               } else {
+                       c.Check(err, check.ErrorMatches, `authentication failed.*`)
+               }
+       }
+}
index aa5f22a501331a2bdd108878987e25b133df720b..42decff31d0cada0bb83ce66ba509aa5f5d13448 100644 (file)
@@ -115,6 +115,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
                ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
                        return ChooseInstanceType(s.cluster, ctr)
                },
+               Logger: ctxlog.TestLogger(c),
        }
        for i := 0; i < 200; i++ {
                queue.Containers = append(queue.Containers, arvados.Container{
@@ -170,6 +171,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
                        stubvm.CrunchRunCrashRate = 0.1
                }
        }
+       s.stubDriver.Bugf = c.Errorf
 
        start := time.Now()
        go s.disp.run()
@@ -303,7 +305,7 @@ func (s *DispatcherSuite) TestInstancesAPI(c *check.C) {
                time.Sleep(time.Millisecond)
        }
        c.Assert(len(sr.Items), check.Equals, 1)
-       c.Check(sr.Items[0].Instance, check.Matches, "stub.*")
+       c.Check(sr.Items[0].Instance, check.Matches, "inst.*")
        c.Check(sr.Items[0].WorkerState, check.Equals, "booting")
        c.Check(sr.Items[0].Price, check.Equals, 0.123)
        c.Check(sr.Items[0].LastContainerUUID, check.Equals, "")
index 4447f084a90cff2f962298c2bb3a71fef851ebb1..dddb974b326fbe7d61c280148e71f4f5c86e7abe 100644 (file)
@@ -88,6 +88,8 @@ tryrun:
                                // a higher-priority container on the
                                // same instance type. Don't let this
                                // one sneak in ahead of it.
+                       } else if sch.pool.KillContainer(ctr.UUID, "about to lock") {
+                               logger.Info("not restarting yet: crunch-run process from previous attempt has not exited")
                        } else if sch.pool.StartContainer(it, ctr) {
                                // Success.
                        } else {
index 32c6b3b24d198b90adb5f2899580783beb2dd9cb..992edddfba6370198a16def5a6b57aed18575aa4 100644 (file)
@@ -83,8 +83,9 @@ func (p *stubPool) ForgetContainer(uuid string) {
 func (p *stubPool) KillContainer(uuid, reason string) bool {
        p.Lock()
        defer p.Unlock()
-       delete(p.running, uuid)
-       return true
+       defer delete(p.running, uuid)
+       t, ok := p.running[uuid]
+       return ok && t.IsZero()
 }
 func (p *stubPool) Shutdown(arvados.InstanceType) bool {
        p.shutdowns++
index 116ca7643117d3f4df3b6e8d4e99864a44d6dfe6..fc683505f93dbae41ff42f31032dd2d145d72169 100644 (file)
@@ -109,13 +109,17 @@ func (sch *Scheduler) cancel(uuid string, reason string) {
 }
 
 func (sch *Scheduler) kill(uuid string, reason string) {
+       if !sch.uuidLock(uuid, "kill") {
+               return
+       }
+       defer sch.uuidUnlock(uuid)
        sch.pool.KillContainer(uuid, reason)
        sch.pool.ForgetContainer(uuid)
 }
 
 func (sch *Scheduler) requeue(ent container.QueueEnt, reason string) {
        uuid := ent.Container.UUID
-       if !sch.uuidLock(uuid, "cancel") {
+       if !sch.uuidLock(uuid, "requeue") {
                return
        }
        defer sch.uuidUnlock(uuid)
index 11d410fb1b9a931b8b65cb990aea1298babf7269..74b84122f286d912d8f8ef392e3eb860e6b1831d 100644 (file)
@@ -11,6 +11,7 @@ import (
 
        "git.arvados.org/arvados.git/lib/dispatchcloud/container"
        "git.arvados.org/arvados.git/sdk/go/arvados"
+       "github.com/sirupsen/logrus"
 )
 
 // Queue is a test stub for container.Queue. The caller specifies the
@@ -23,6 +24,8 @@ type Queue struct {
        // must not be nil.
        ChooseType func(*arvados.Container) (arvados.InstanceType, error)
 
+       Logger logrus.FieldLogger
+
        entries     map[string]container.QueueEnt
        updTime     time.Time
        subscribers map[<-chan struct{}]chan struct{}
@@ -166,13 +169,36 @@ func (q *Queue) Notify(upd arvados.Container) bool {
        defer q.mtx.Unlock()
        for i, ctr := range q.Containers {
                if ctr.UUID == upd.UUID {
-                       if ctr.State != arvados.ContainerStateComplete && ctr.State != arvados.ContainerStateCancelled {
+                       if allowContainerUpdate[ctr.State][upd.State] {
                                q.Containers[i] = upd
                                return true
+                       } else {
+                               if q.Logger != nil {
+                                       q.Logger.WithField("ContainerUUID", ctr.UUID).Infof("test.Queue rejected update from %s to %s", ctr.State, upd.State)
+                               }
+                               return false
                        }
-                       return false
                }
        }
        q.Containers = append(q.Containers, upd)
        return true
 }
+
+var allowContainerUpdate = map[arvados.ContainerState]map[arvados.ContainerState]bool{
+       arvados.ContainerStateQueued: map[arvados.ContainerState]bool{
+               arvados.ContainerStateQueued:    true,
+               arvados.ContainerStateLocked:    true,
+               arvados.ContainerStateCancelled: true,
+       },
+       arvados.ContainerStateLocked: map[arvados.ContainerState]bool{
+               arvados.ContainerStateQueued:    true,
+               arvados.ContainerStateLocked:    true,
+               arvados.ContainerStateRunning:   true,
+               arvados.ContainerStateCancelled: true,
+       },
+       arvados.ContainerStateRunning: map[arvados.ContainerState]bool{
+               arvados.ContainerStateRunning:   true,
+               arvados.ContainerStateCancelled: true,
+               arvados.ContainerStateComplete:  true,
+       },
+}
index 7a1f42301684a5a042f555c3e17ccda5d2f8b6c5..f6e06d3f7cebd05a97c8851f65d8d79812d18297 100644 (file)
@@ -34,6 +34,11 @@ type StubDriver struct {
        // VM's error rate and other behaviors.
        SetupVM func(*StubVM)
 
+       // Bugf, if set, is called if a bug is detected in the caller
+       // or stub. Typically set to (*check.C)Errorf. If unset,
+       // logger.Warnf is called instead.
+       Bugf func(string, ...interface{})
+
        // StubVM's fake crunch-run uses this Queue to read and update
        // container state.
        Queue *Queue
@@ -99,6 +104,7 @@ type StubInstanceSet struct {
 
        allowCreateCall    time.Time
        allowInstancesCall time.Time
+       lastInstanceID     int
 }
 
 func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags cloud.InstanceTags, cmd cloud.InitCommand, authKey ssh.PublicKey) (cloud.Instance, error) {
@@ -120,9 +126,10 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID,
        if authKey != nil {
                ak = append([]ssh.PublicKey{authKey}, ak...)
        }
+       sis.lastInstanceID++
        svm := &StubVM{
                sis:          sis,
-               id:           cloud.InstanceID(fmt.Sprintf("stub-%s-%x", it.ProviderType, math_rand.Int63())),
+               id:           cloud.InstanceID(fmt.Sprintf("inst%d,%s", sis.lastInstanceID, it.ProviderType)),
                tags:         copyTags(tags),
                providerType: it.ProviderType,
                initCommand:  cmd,
@@ -263,49 +270,68 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader,
                })
                logger.Printf("[test] starting crunch-run stub")
                go func() {
+                       var ctr arvados.Container
+                       var started, completed bool
+                       defer func() {
+                               logger.Print("[test] exiting crunch-run stub")
+                               svm.Lock()
+                               defer svm.Unlock()
+                               if svm.running[uuid] != pid {
+                                       if !completed {
+                                               bugf := svm.sis.driver.Bugf
+                                               if bugf == nil {
+                                                       bugf = logger.Warnf
+                                               }
+                                               bugf("[test] StubDriver bug or caller bug: pid %d exiting, running[%s]==%d", pid, uuid, svm.running[uuid])
+                                       }
+                               } else {
+                                       delete(svm.running, uuid)
+                               }
+                               if !completed {
+                                       logger.WithField("State", ctr.State).Print("[test] crashing crunch-run stub")
+                                       if started && svm.CrashRunningContainer != nil {
+                                               svm.CrashRunningContainer(ctr)
+                                       }
+                               }
+                       }()
+
                        crashluck := math_rand.Float64()
+                       wantCrash := crashluck < svm.CrunchRunCrashRate
+                       wantCrashEarly := crashluck < svm.CrunchRunCrashRate/2
+
                        ctr, ok := queue.Get(uuid)
                        if !ok {
                                logger.Print("[test] container not in queue")
                                return
                        }
 
-                       defer func() {
-                               if ctr.State == arvados.ContainerStateRunning && svm.CrashRunningContainer != nil {
-                                       svm.CrashRunningContainer(ctr)
-                               }
-                       }()
-
-                       if crashluck > svm.CrunchRunCrashRate/2 {
-                               time.Sleep(time.Duration(math_rand.Float64()*20) * time.Millisecond)
-                               ctr.State = arvados.ContainerStateRunning
-                               if !queue.Notify(ctr) {
-                                       ctr, _ = queue.Get(uuid)
-                                       logger.Print("[test] erroring out because state=Running update was rejected")
-                                       return
-                               }
-                       }
-
                        time.Sleep(time.Duration(math_rand.Float64()*20) * time.Millisecond)
 
                        svm.Lock()
-                       defer svm.Unlock()
-                       if svm.running[uuid] != pid {
-                               logger.Print("[test] container was killed")
+                       killed := svm.running[uuid] != pid
+                       svm.Unlock()
+                       if killed || wantCrashEarly {
                                return
                        }
-                       delete(svm.running, uuid)
 
-                       if crashluck < svm.CrunchRunCrashRate {
+                       ctr.State = arvados.ContainerStateRunning
+                       started = queue.Notify(ctr)
+                       if !started {
+                               ctr, _ = queue.Get(uuid)
+                               logger.Print("[test] erroring out because state=Running update was rejected")
+                               return
+                       }
+
+                       if wantCrash {
                                logger.WithField("State", ctr.State).Print("[test] crashing crunch-run stub")
-                       } else {
-                               if svm.ExecuteContainer != nil {
-                                       ctr.ExitCode = svm.ExecuteContainer(ctr)
-                               }
-                               logger.WithField("ExitCode", ctr.ExitCode).Print("[test] exiting crunch-run stub")
-                               ctr.State = arvados.ContainerStateComplete
-                               go queue.Notify(ctr)
+                               return
+                       }
+                       if svm.ExecuteContainer != nil {
+                               ctr.ExitCode = svm.ExecuteContainer(ctr)
                        }
+                       logger.WithField("ExitCode", ctr.ExitCode).Print("[test] completing container")
+                       ctr.State = arvados.ContainerStateComplete
+                       completed = queue.Notify(ctr)
                }()
                return 0
        }
index 88a5ceecee1dfc5f1cad8714845df9ffc3d8d5c5..4096a2eb156b39bc26a94a428342dbd77815f56a 100644 (file)
@@ -31,7 +31,7 @@ Gem::Specification.new do |s|
   s.summary     = "Arvados CLI tools"
   s.description = "Arvados command line tools, git commit #{git_hash}"
   s.authors     = ["Arvados Authors"]
-  s.email       = 'gem-dev@curoverse.com'
+  s.email       = 'gem-dev@arvados.org'
   #s.bindir      = '.'
   s.licenses    = ['Apache-2.0']
   s.files       = ["bin/arv", "bin/arv-tag", "LICENSE-2.0.txt"]
@@ -42,11 +42,12 @@ Gem::Specification.new do |s|
   # Our google-api-client dependency used to be < 0.9, but that could be
   # satisfied by the buggy 0.9.pre*.  https://dev.arvados.org/issues/9213
   s.add_runtime_dependency 'arvados-google-api-client', '~> 0.6', '>= 0.6.3', '<0.8.9'
-  s.add_runtime_dependency 'activesupport', '>= 3.2.13', '< 5.1'
+  s.add_runtime_dependency 'activesupport', '>= 3.2.13', '< 5.3'
   s.add_runtime_dependency 'json', '>= 1.7.7', '<3'
   s.add_runtime_dependency 'optimist', '~> 3.0'
   s.add_runtime_dependency 'andand', '~> 1.3', '>= 1.3.3'
-  s.add_runtime_dependency 'oj', '~> 3.0'
+  # oj 3.10.9 requires ruby >= 2.4 and arvbox doesn't currently have it because of SSO
+  s.add_runtime_dependency 'oj', '< 3.10.9'
   s.add_runtime_dependency 'curb', '~> 0.8'
   s.add_runtime_dependency 'launchy', '< 2.5'
   # arvados-google-api-client 0.8.7.2 is incompatible with faraday 0.16.2
index 9cf1ed3cd182ba8f8659b38dee81bcf0a52ab976..d98ffd18ed154c6882eb64c16f3af47bbc8c4b94 100644 (file)
@@ -121,6 +121,7 @@ type Cluster struct {
                TrashSweepInterval           Duration
                TrustAllContent              bool
                ForwardSlashNameSubstitution string
+               S3FolderObjects              bool
 
                BlobMissingReport        string
                BalancePeriod            Duration
@@ -176,8 +177,13 @@ type Cluster struct {
                        ProviderAppID     string
                        ProviderAppSecret string
                }
+               Test struct {
+                       Enable bool
+                       Users  map[string]TestUser
+               }
                LoginCluster       string
                RemoteTokenRefresh Duration
+               TokenLifetime      Duration
        }
        Mail struct {
                MailchimpAPIKey                string
@@ -314,7 +320,6 @@ type Services struct {
        Keepbalance    Service
        Keepproxy      Service
        Keepstore      Service
-       Nodemanager    Service
        RailsAPI       Service
        SSO            Service
        WebDAVDownload Service
@@ -330,6 +335,11 @@ type Service struct {
        ExternalURL  URL
 }
 
+type TestUser struct {
+       Email    string
+       Password string
+}
+
 // URL is a url.URL that is also usable as a JSON key/value.
 type URL url.URL
 
@@ -567,7 +577,6 @@ const (
        ServiceNameController    ServiceName = "arvados-controller"
        ServiceNameDispatchCloud ServiceName = "arvados-dispatch-cloud"
        ServiceNameHealth        ServiceName = "arvados-health"
-       ServiceNameNodemanager   ServiceName = "arvados-node-manager"
        ServiceNameWorkbench1    ServiceName = "arvados-workbench1"
        ServiceNameWorkbench2    ServiceName = "arvados-workbench2"
        ServiceNameWebsocket     ServiceName = "arvados-ws"
@@ -585,7 +594,6 @@ func (svcs Services) Map() map[ServiceName]Service {
                ServiceNameController:    svcs.Controller,
                ServiceNameDispatchCloud: svcs.DispatchCloud,
                ServiceNameHealth:        svcs.Health,
-               ServiceNameNodemanager:   svcs.Nodemanager,
                ServiceNameWorkbench1:    svcs.Workbench1,
                ServiceNameWorkbench2:    svcs.Workbench2,
                ServiceNameWebsocket:     svcs.Websocket,
index d06aba3695adc37f3d74057de2568778bcd9f9c9..5e57fed3beab3281b1d498936edb4eed813398ec 100644 (file)
@@ -31,6 +31,10 @@ var (
        ErrPermission        = os.ErrPermission
 )
 
+type syncer interface {
+       Sync() error
+}
+
 // A File is an *os.File-like interface for reading and writing files
 // in a FileSystem.
 type File interface {
@@ -299,6 +303,22 @@ func (n *treenode) Readdir() (fi []os.FileInfo, err error) {
        return
 }
 
+func (n *treenode) Sync() error {
+       n.RLock()
+       defer n.RUnlock()
+       for _, inode := range n.inodes {
+               syncer, ok := inode.(syncer)
+               if !ok {
+                       return ErrInvalidOperation
+               }
+               err := syncer.Sync()
+               if err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
 type fileSystem struct {
        root inode
        fsBackend
@@ -576,8 +596,11 @@ func (fs *fileSystem) remove(name string, recursive bool) error {
 }
 
 func (fs *fileSystem) Sync() error {
-       log.Printf("TODO: sync fileSystem")
-       return ErrInvalidOperation
+       if syncer, ok := fs.root.(syncer); ok {
+               return syncer.Sync()
+       } else {
+               return ErrInvalidOperation
+       }
 }
 
 func (fs *fileSystem) Flush(string, bool) error {
index 37bd494914df507dc9fc193576dc9e0afcc98ea9..060b57b493cbf555d4d87d9b71db4f6171ff85b8 100644 (file)
@@ -121,6 +121,62 @@ func (fs *collectionFileSystem) newNode(name string, perm os.FileMode, modTime t
        }
 }
 
+func (fs *collectionFileSystem) Child(name string, replace func(inode) (inode, error)) (inode, error) {
+       return fs.rootnode().Child(name, replace)
+}
+
+func (fs *collectionFileSystem) FS() FileSystem {
+       return fs
+}
+
+func (fs *collectionFileSystem) FileInfo() os.FileInfo {
+       return fs.rootnode().FileInfo()
+}
+
+func (fs *collectionFileSystem) IsDir() bool {
+       return true
+}
+
+func (fs *collectionFileSystem) Lock() {
+       fs.rootnode().Lock()
+}
+
+func (fs *collectionFileSystem) Unlock() {
+       fs.rootnode().Unlock()
+}
+
+func (fs *collectionFileSystem) RLock() {
+       fs.rootnode().RLock()
+}
+
+func (fs *collectionFileSystem) RUnlock() {
+       fs.rootnode().RUnlock()
+}
+
+func (fs *collectionFileSystem) Parent() inode {
+       return fs.rootnode().Parent()
+}
+
+func (fs *collectionFileSystem) Read(_ []byte, ptr filenodePtr) (int, filenodePtr, error) {
+       return 0, ptr, ErrInvalidOperation
+}
+
+func (fs *collectionFileSystem) Write(_ []byte, ptr filenodePtr) (int, filenodePtr, error) {
+       return 0, ptr, ErrInvalidOperation
+}
+
+func (fs *collectionFileSystem) Readdir() ([]os.FileInfo, error) {
+       return fs.rootnode().Readdir()
+}
+
+func (fs *collectionFileSystem) SetParent(parent inode, name string) {
+       fs.rootnode().SetParent(parent, name)
+}
+
+func (fs *collectionFileSystem) Truncate(int64) error {
+       return ErrInvalidOperation
+}
+
 func (fs *collectionFileSystem) Sync() error {
        if fs.uuid == "" {
                return nil
@@ -512,8 +568,6 @@ func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePt
                                seg.Truncate(len(cando))
                                fn.memsize += int64(len(cando))
                                fn.segments[cur] = seg
-                               cur++
-                               prev++
                        }
                }
 
@@ -1053,9 +1107,9 @@ func (dn *dirnode) loadManifest(txt string) error {
                                // situation might be rare anyway)
                                segIdx, pos = 0, 0
                        }
-                       for next := int64(0); segIdx < len(segments); segIdx++ {
+                       for ; segIdx < len(segments); segIdx++ {
                                seg := segments[segIdx]
-                               next = pos + int64(seg.Len())
+                               next := pos + int64(seg.Len())
                                if next <= offset || seg.Len() == 0 {
                                        pos = next
                                        continue
index f01369a885ece3b7c315c832b114caaf77715862..59a6a6ba825e57928e9348c17d971988fa24fc94 100644 (file)
@@ -7,7 +7,6 @@ package arvados
 import (
        "bytes"
        "crypto/md5"
-       "crypto/sha1"
        "errors"
        "fmt"
        "io"
@@ -33,6 +32,9 @@ type keepClientStub struct {
        blocks      map[string][]byte
        refreshable map[string]bool
        onPut       func(bufcopy []byte) // called from PutB, before acquiring lock
+       authToken   string               // client's auth token (used for signing locators)
+       sigkey      string               // blob signing key
+       sigttl      time.Duration        // blob signing ttl
        sync.RWMutex
 }
 
@@ -49,7 +51,7 @@ func (kcs *keepClientStub) ReadAt(locator string, p []byte, off int) (int, error
 }
 
 func (kcs *keepClientStub) PutB(p []byte) (string, int, error) {
-       locator := fmt.Sprintf("%x+%d+A12345@abcde", md5.Sum(p), len(p))
+       locator := SignLocator(fmt.Sprintf("%x+%d", md5.Sum(p), len(p)), kcs.authToken, time.Now().Add(kcs.sigttl), kcs.sigttl, []byte(kcs.sigkey))
        buf := make([]byte, len(p))
        copy(buf, p)
        if kcs.onPut != nil {
@@ -61,9 +63,12 @@ func (kcs *keepClientStub) PutB(p []byte) (string, int, error) {
        return locator, 1, nil
 }
 
-var localOrRemoteSignature = regexp.MustCompile(`\+[AR][^+]*`)
+var reRemoteSignature = regexp.MustCompile(`\+[AR][^+]*`)
 
 func (kcs *keepClientStub) LocalLocator(locator string) (string, error) {
+       if strings.Contains(locator, "+A") {
+               return locator, nil
+       }
        kcs.Lock()
        defer kcs.Unlock()
        if strings.Contains(locator, "+R") {
@@ -74,8 +79,9 @@ func (kcs *keepClientStub) LocalLocator(locator string) (string, error) {
                        return "", fmt.Errorf("kcs.refreshable[%q]==false", locator)
                }
        }
-       fakeSig := fmt.Sprintf("+A%x@%x", sha1.Sum(nil), time.Now().Add(time.Hour*24*14).Unix())
-       return localOrRemoteSignature.ReplaceAllLiteralString(locator, fakeSig), nil
+       locator = reRemoteSignature.ReplaceAllLiteralString(locator, "")
+       locator = SignLocator(locator, kcs.authToken, time.Now().Add(kcs.sigttl), kcs.sigttl, []byte(kcs.sigkey))
+       return locator, nil
 }
 
 type CollectionFSSuite struct {
@@ -92,7 +98,11 @@ func (s *CollectionFSSuite) SetUpTest(c *check.C) {
        s.kc = &keepClientStub{
                blocks: map[string][]byte{
                        "3858f62230ac3c915f300c664312c63f": []byte("foobar"),
-               }}
+               },
+               sigkey:    fixtureBlobSigningKey,
+               sigttl:    fixtureBlobSigningTTL,
+               authToken: fixtureActiveToken,
+       }
        s.fs, err = s.coll.FileSystem(s.client, s.kc)
        c.Assert(err, check.IsNil)
 }
index 439eaec7c2a5dbde49f2fd2851551238a22166ec..254b90c812a337de96cb34da01b767dbe7adcc5a 100644 (file)
@@ -32,14 +32,14 @@ func deferredCollectionFS(fs FileSystem, parent inode, coll Collection) inode {
                        log.Printf("BUG: unhandled error: %s", err)
                        return placeholder
                }
-               cfs, err := coll.FileSystem(fs, fs)
+               newfs, err := coll.FileSystem(fs, fs)
                if err != nil {
                        log.Printf("BUG: unhandled error: %s", err)
                        return placeholder
                }
-               root := cfs.rootnode()
-               root.SetParent(parent, coll.Name)
-               return root
+               cfs := newfs.(*collectionFileSystem)
+               cfs.SetParent(parent, coll.Name)
+               return cfs
        }}
 }
 
@@ -87,6 +87,19 @@ func (dn *deferrednode) Child(name string, replace func(inode) (inode, error)) (
        return dn.realinode().Child(name, replace)
 }
 
+// Sync is a no-op if the real inode hasn't even been created yet.
+func (dn *deferrednode) Sync() error {
+       dn.mtx.Lock()
+       defer dn.mtx.Unlock()
+       if !dn.created {
+               return nil
+       } else if syncer, ok := dn.wrapped.(syncer); ok {
+               return syncer.Sync()
+       } else {
+               return ErrInvalidOperation
+       }
+}
+
 func (dn *deferrednode) Truncate(size int64) error       { return dn.realinode().Truncate(size) }
 func (dn *deferrednode) SetParent(p inode, name string)  { dn.realinode().SetParent(p, name) }
 func (dn *deferrednode) IsDir() bool                     { return dn.currentinode().IsDir() }
index 42322a14a9adda155c75f225ef43e2cdfd96615c..56b5953234784424e51676a90b4c148661cb8c4d 100644 (file)
@@ -15,7 +15,7 @@ import (
 //
 // See (*customFileSystem)MountUsers for example usage.
 type lookupnode struct {
-       inode
+       treenode
        loadOne func(parent inode, name string) (inode, error)
        loadAll func(parent inode) ([]inode, error)
        stale   func(time.Time) bool
@@ -26,6 +26,20 @@ type lookupnode struct {
        staleOne  map[string]time.Time
 }
 
+// Sync flushes pending writes for loaded children and, if successful,
+// triggers a reload on next lookup.
+func (ln *lookupnode) Sync() error {
+       err := ln.treenode.Sync()
+       if err != nil {
+               return err
+       }
+       ln.staleLock.Lock()
+       ln.staleAll = time.Time{}
+       ln.staleOne = nil
+       ln.staleLock.Unlock()
+       return nil
+}
+
 func (ln *lookupnode) Readdir() ([]os.FileInfo, error) {
        ln.staleLock.Lock()
        defer ln.staleLock.Unlock()
@@ -36,7 +50,7 @@ func (ln *lookupnode) Readdir() ([]os.FileInfo, error) {
                        return nil, err
                }
                for _, child := range all {
-                       _, err = ln.inode.Child(child.FileInfo().Name(), func(inode) (inode, error) {
+                       _, err = ln.treenode.Child(child.FileInfo().Name(), func(inode) (inode, error) {
                                return child, nil
                        })
                        if err != nil {
@@ -49,25 +63,47 @@ func (ln *lookupnode) Readdir() ([]os.FileInfo, error) {
                // newer than ln.staleAll. Reclaim memory.
                ln.staleOne = nil
        }
-       return ln.inode.Readdir()
+       return ln.treenode.Readdir()
 }
 
+// Child rejects (with ErrInvalidArgument) calls to add/replace
+// children, instead calling loadOne when a non-existing child is
+// looked up.
 func (ln *lookupnode) Child(name string, replace func(inode) (inode, error)) (inode, error) {
        ln.staleLock.Lock()
        defer ln.staleLock.Unlock()
        checkTime := time.Now()
+       var existing inode
+       var err error
        if ln.stale(ln.staleAll) && ln.stale(ln.staleOne[name]) {
-               _, err := ln.inode.Child(name, func(inode) (inode, error) {
+               existing, err = ln.treenode.Child(name, func(inode) (inode, error) {
                        return ln.loadOne(ln, name)
                })
-               if err != nil {
-                       return nil, err
+               if err == nil && existing != nil {
+                       if ln.staleOne == nil {
+                               ln.staleOne = map[string]time.Time{name: checkTime}
+                       } else {
+                               ln.staleOne[name] = checkTime
+                       }
                }
-               if ln.staleOne == nil {
-                       ln.staleOne = map[string]time.Time{name: checkTime}
-               } else {
-                       ln.staleOne[name] = checkTime
+       } else {
+               existing, err = ln.treenode.Child(name, nil)
+               if err != nil && !os.IsNotExist(err) {
+                       return existing, err
+               }
+       }
+       if replace != nil {
+               // Let the callback try to delete or replace the
+               // existing node; if it does, return
+               // ErrInvalidArgument.
+               if tryRepl, err := replace(existing); err != nil {
+                       // Propagate error from callback
+                       return existing, err
+               } else if tryRepl != existing {
+                       return existing, ErrInvalidArgument
                }
        }
-       return ln.inode.Child(name, replace)
+       // Return original error from ln.treenode.Child() (it might be
+       // ErrNotExist).
+       return existing, err
 }
index c5eb03360a3877b577168611a8e579329b6abfa8..bf6391a74e4455ea076f82ac49173cb36e825a48 100644 (file)
@@ -6,7 +6,6 @@ package arvados
 
 import (
        "log"
-       "os"
        "strings"
 )
 
@@ -57,7 +56,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in
                // both "/" and the substitution string.
        }
        if len(contents.Items) == 0 {
-               return nil, os.ErrNotExist
+               return nil, nil
        }
        coll := contents.Items[0]
 
index 61d82c7fa9f4e442d6492ba8fc0f285df76bd5f2..86facd681e5aa336ed6c73252ecc9c3936c9502e 100644 (file)
@@ -200,6 +200,23 @@ func (s *SiteFSSuite) TestProjectUpdatedByOther(c *check.C) {
        err = wf.Close()
        c.Check(err, check.IsNil)
 
+       err = project.Sync()
+       c.Check(err, check.IsNil)
+       _, err = s.fs.Open("/home/A Project/oob/test.txt")
+       c.Check(err, check.IsNil)
+
+       // Sync again to mark the project dir as stale, so the
+       // collection gets reloaded from the controller on next
+       // lookup.
+       err = project.Sync()
+       c.Check(err, check.IsNil)
+
+       // Ensure collection was flushed by Sync
+       var latest Collection
+       err = s.client.RequestAndDecode(&latest, "GET", "arvados/v1/collections/"+oob.UUID, nil, nil)
+       c.Check(err, check.IsNil)
+       c.Check(latest.ManifestText, check.Matches, `.*:test.txt.*\n`)
+
        // Delete test.txt behind s.fs's back by updating the
        // collection record with an empty ManifestText.
        err = s.client.RequestAndDecode(nil, "PATCH", "arvados/v1/collections/"+oob.UUID, nil, map[string]interface{}{
@@ -210,8 +227,6 @@ func (s *SiteFSSuite) TestProjectUpdatedByOther(c *check.C) {
        })
        c.Assert(err, check.IsNil)
 
-       err = project.Sync()
-       c.Check(err, check.IsNil)
        _, err = s.fs.Open("/home/A Project/oob/test.txt")
        c.Check(err, check.NotNil)
        _, err = s.fs.Open("/home/A Project/oob")
@@ -221,7 +236,27 @@ func (s *SiteFSSuite) TestProjectUpdatedByOther(c *check.C) {
        c.Assert(err, check.IsNil)
 
        err = project.Sync()
-       c.Check(err, check.IsNil)
+       c.Check(err, check.NotNil) // can't update the deleted collection
        _, err = s.fs.Open("/home/A Project/oob")
-       c.Check(err, check.NotNil)
+       c.Check(err, check.IsNil) // parent dir still has old collection -- didn't reload, because Sync failed
+}
+
+func (s *SiteFSSuite) TestProjectUnsupportedOperations(c *check.C) {
+       s.fs.MountByID("by_id")
+       s.fs.MountProject("home", "")
+
+       _, err := s.fs.OpenFile("/home/A Project/newfilename", os.O_CREATE|os.O_RDWR, 0)
+       c.Check(err, check.ErrorMatches, "invalid argument")
+
+       err = s.fs.Mkdir("/home/A Project/newdirname", 0)
+       c.Check(err, check.ErrorMatches, "invalid argument")
+
+       err = s.fs.Mkdir("/by_id/newdirname", 0)
+       c.Check(err, check.ErrorMatches, "invalid argument")
+
+       err = s.fs.Mkdir("/by_id/"+fixtureAProjectUUID+"/newdirname", 0)
+       c.Check(err, check.ErrorMatches, "invalid argument")
+
+       _, err = s.fs.OpenFile("/home/A Project", 0, 0)
+       c.Check(err, check.IsNil)
 }
index 7826d335c81fa93cfe54bf39a81a66335a65d336..900893aa36420e7c9d2008fff31b36d4bd03e0bf 100644 (file)
@@ -40,7 +40,7 @@ func (c *Client) CustomFileSystem(kc keepClient) CustomFileSystem {
                        thr:       newThrottle(concurrentWriters),
                },
        }
-       root.inode = &treenode{
+       root.treenode = treenode{
                fs:     fs,
                parent: root,
                fileinfo: fileinfo{
@@ -54,9 +54,9 @@ func (c *Client) CustomFileSystem(kc keepClient) CustomFileSystem {
 }
 
 func (fs *customFileSystem) MountByID(mount string) {
-       fs.root.inode.Child(mount, func(inode) (inode, error) {
+       fs.root.treenode.Child(mount, func(inode) (inode, error) {
                return &vdirnode{
-                       inode: &treenode{
+                       treenode: treenode{
                                fs:     fs,
                                parent: fs.root,
                                inodes: make(map[string]inode),
@@ -72,18 +72,18 @@ func (fs *customFileSystem) MountByID(mount string) {
 }
 
 func (fs *customFileSystem) MountProject(mount, uuid string) {
-       fs.root.inode.Child(mount, func(inode) (inode, error) {
+       fs.root.treenode.Child(mount, func(inode) (inode, error) {
                return fs.newProjectNode(fs.root, mount, uuid), nil
        })
 }
 
 func (fs *customFileSystem) MountUsers(mount string) {
-       fs.root.inode.Child(mount, func(inode) (inode, error) {
+       fs.root.treenode.Child(mount, func(inode) (inode, error) {
                return &lookupnode{
                        stale:   fs.Stale,
                        loadOne: fs.usersLoadOne,
                        loadAll: fs.usersLoadAll,
-                       inode: &treenode{
+                       treenode: treenode{
                                fs:     fs,
                                parent: fs.root,
                                inodes: make(map[string]inode),
@@ -115,10 +115,7 @@ func (c *Client) SiteFileSystem(kc keepClient) CustomFileSystem {
 }
 
 func (fs *customFileSystem) Sync() error {
-       fs.staleLock.Lock()
-       defer fs.staleLock.Unlock()
-       fs.staleThreshold = time.Now()
-       return nil
+       return fs.root.Sync()
 }
 
 // Stale returns true if information obtained at time t should be
@@ -130,7 +127,7 @@ func (fs *customFileSystem) Stale(t time.Time) bool {
 }
 
 func (fs *customFileSystem) newNode(name string, perm os.FileMode, modTime time.Time) (node inode, err error) {
-       return nil, ErrInvalidOperation
+       return nil, ErrInvalidArgument
 }
 
 func (fs *customFileSystem) mountByID(parent inode, id string) inode {
@@ -149,13 +146,13 @@ func (fs *customFileSystem) mountCollection(parent inode, id string) inode {
        if err != nil {
                return nil
        }
-       cfs, err := coll.FileSystem(fs, fs)
+       newfs, err := coll.FileSystem(fs, fs)
        if err != nil {
                return nil
        }
-       root := cfs.rootnode()
-       root.SetParent(parent, id)
-       return root
+       cfs := newfs.(*collectionFileSystem)
+       cfs.SetParent(parent, id)
+       return cfs
 }
 
 func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode {
@@ -163,7 +160,7 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode
                stale:   fs.Stale,
                loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) },
                loadAll: func(parent inode) ([]inode, error) { return fs.projectsLoadAll(parent, uuid) },
-               inode: &treenode{
+               treenode: treenode{
                        fs:     fs,
                        parent: root,
                        inodes: make(map[string]inode),
@@ -176,24 +173,24 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode
        }
 }
 
-// vdirnode wraps an inode by ignoring any requests to add/replace
-// children, and calling a create() func when a non-existing child is
-// looked up.
+// vdirnode wraps an inode by rejecting (with ErrInvalidArgument)
+// calls that add/replace children directly, instead calling a
+// create() func when a non-existing child is looked up.
 //
 // create() can return either a new node, which will be added to the
 // treenode, or nil for ENOENT.
 type vdirnode struct {
-       inode
+       treenode
        create func(parent inode, name string) inode
 }
 
 func (vn *vdirnode) Child(name string, replace func(inode) (inode, error)) (inode, error) {
-       return vn.inode.Child(name, func(existing inode) (inode, error) {
+       return vn.treenode.Child(name, func(existing inode) (inode, error) {
                if existing == nil && vn.create != nil {
                        existing = vn.create(vn, name)
                        if existing != nil {
                                existing.SetParent(vn, name)
-                               vn.inode.(*treenode).fileinfo.modTime = time.Now()
+                               vn.treenode.fileinfo.modTime = time.Now()
                        }
                }
                if replace == nil {
index 80cc03df37b88ad82ad246db6d4d7bce68dd68a2..778b12015a6f3964be7db301f30cd8ca5db1a971 100644 (file)
@@ -7,6 +7,7 @@ package arvados
 import (
        "net/http"
        "os"
+       "time"
 
        check "gopkg.in/check.v1"
 )
@@ -22,6 +23,8 @@ const (
        fixtureFooCollectionPDH        = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45"
        fixtureFooCollection           = "zzzzz-4zz18-fy296fx3hot09f7"
        fixtureNonexistentCollection   = "zzzzz-4zz18-totallynotexist"
+       fixtureBlobSigningKey          = "zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc"
+       fixtureBlobSigningTTL          = 336 * time.Hour
 )
 
 var _ = check.Suite(&SiteFSSuite{})
@@ -41,7 +44,11 @@ func (s *SiteFSSuite) SetUpTest(c *check.C) {
        s.kc = &keepClientStub{
                blocks: map[string][]byte{
                        "3858f62230ac3c915f300c664312c63f": []byte("foobar"),
-               }}
+               },
+               sigkey:    fixtureBlobSigningKey,
+               sigttl:    fixtureBlobSigningTTL,
+               authToken: fixtureActiveToken,
+       }
        s.fs = s.client.SiteFileSystem(s.kc)
 }
 
@@ -98,7 +105,7 @@ func (s *SiteFSSuite) TestByUUIDAndPDH(c *check.C) {
        c.Check(names, check.DeepEquals, []string{"baz"})
 
        _, err = s.fs.OpenFile("/by_id/"+fixtureNonexistentCollection, os.O_RDWR|os.O_CREATE, 0755)
-       c.Check(err, check.Equals, ErrInvalidOperation)
+       c.Check(err, check.Equals, ErrInvalidArgument)
        err = s.fs.Rename("/by_id/"+fixtureFooCollection, "/by_id/beep")
        c.Check(err, check.Equals, ErrInvalidArgument)
        err = s.fs.Rename("/by_id/"+fixtureFooCollection+"/foo", "/by_id/beep")
index f4b0a994366db603ba0284cb76f4a6573ff266c7..2acf3e59ab81ae10ff816577f5f33fdaea8b9922 100644 (file)
@@ -157,7 +157,6 @@ func (s *AggregatorSuite) setAllServiceURLs(listen string) {
                &svcs.Keepproxy,
                &svcs.Keepstore,
                &svcs.Health,
-               &svcs.Nodemanager,
                &svcs.RailsAPI,
                &svcs.WebDAV,
                &svcs.Websocket,
index a1801b21456b9a6d8bbb716f4db19eaa78feaa4a..2604b02b17aaeb412b2519e4c09a69264fa8d340 100644 (file)
@@ -535,6 +535,7 @@ func (s *StandaloneSuite) TestGetEmptyBlock(c *C) {
        defer ks.listener.Close()
 
        arv, err := arvadosclient.MakeArvadosClient()
+       c.Check(err, IsNil)
        kc, _ := MakeKeepClient(arv)
        arv.ApiToken = "abc123"
        kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
diff --git a/sdk/pam/.dockerignore b/sdk/pam/.dockerignore
deleted file mode 100644 (file)
index 922b80e..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-*~
-*.pyc
-.eggs
-*.egg_info
-build
-tmp
diff --git a/sdk/pam/.gitignore b/sdk/pam/.gitignore
deleted file mode 120000 (symlink)
index 1399fd4..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../python/.gitignore
\ No newline at end of file
diff --git a/sdk/pam/Dockerfile b/sdk/pam/Dockerfile
deleted file mode 100644 (file)
index ff450d8..0000000
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-# These tests assume you have a real API server running on the docker host.
-#
-# Build the test container:
-#   First, replace 3000 below with your api server's port number if necessary.
-#   host$ python setup.py sdist rotate --keep=1 --match .tar.gz
-#   host$ docker build --tag=arvados/pam_test .
-#
-# Automated integration test:
-#   host$ docker run -it --add-host zzzzz.arvadosapi.com:"$(hostname -I |awk '{print $1}')" arvados/pam_test
-# You should see "=== OK ===", followed by a Perl stack trace due to a
-# yet-unidentified pam_python.so bug.
-#
-# Manual integration test:
-#   host$ docker run -it --add-host zzzzz.arvadosapi.com:"$(hostname -I |awk '{print $1}')" arvados/pam_test bash -c 'rsyslogd & tail -F /var/log/auth.log & sleep 1 & bash'
-#   container# login
-#   login: active
-#   Arvados API token: 3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi
-# You should now be logged in to the "active" shell account in the
-# container. You should also see arvados_pam log entries in
-# /var/log/auth.log (and in your terminal, thanks to "tail -F").
-
-FROM debian:wheezy
-RUN apt-get update
-RUN apt-get -qy dist-upgrade
-RUN apt-get -qy install python python-virtualenv libpam-python rsyslog
-# Packages required by pycurl, ciso8601
-RUN apt-get -qy install libcurl4-gnutls-dev python2.7-dev
-
-# for jessie (which also has other snags)
-# RUN apt-get -qy install python-pip libgnutls28-dev
-
-RUN pip install --upgrade setuptools
-RUN pip install python-pam
-ADD dist /dist
-RUN pip install /dist/arvados-pam-*.tar.gz
-
-# Configure and enable the module (hopefully vendor packages will offer a neater way)
-RUN perl -pi -e 's{api.example}{zzzzz.arvadosapi.com:3000}; s{shell\.example}{testvm2.shell insecure};' /usr/share/pam-configs/arvados
-RUN DEBIAN_FRONTEND=noninteractive pam-auth-update arvados --remove unix
-
-# Add a user account matching the fixture
-RUN useradd -ms /bin/bash active
-
-# Test with python (SIGSEGV during tests)
-#ADD . /pam
-#WORKDIR /pam
-#CMD rsyslogd & tail -F /var/log/auth.log & python setup.py test --test-suite integration_tests
-
-# Test with perl (SIGSEGV when program exits)
-RUN apt-get install -qy libauthen-pam-perl
-ADD tests/integration_test.pl /integration_test.pl
-CMD rsyslogd & tail -F /var/log/auth.log & sleep 1 && /integration_test.pl
diff --git a/sdk/pam/LICENSE-2.0.txt b/sdk/pam/LICENSE-2.0.txt
deleted file mode 100644 (file)
index d645695..0000000
+++ /dev/null
@@ -1,202 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/sdk/pam/MANIFEST.in b/sdk/pam/MANIFEST.in
deleted file mode 100644 (file)
index 48892fa..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-include LICENSE-2.0.txt
-include README.rst
-include examples/shellinabox
-include lib/libpam_arvados.py
-include pam-configs/arvados
-include arvados_version.py
\ No newline at end of file
diff --git a/sdk/pam/README.rst b/sdk/pam/README.rst
deleted file mode 100644 (file)
index 81be331..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-.. Copyright (C) The Arvados Authors. All rights reserved.
-..
-.. SPDX-License-Identifier: Apache-2.0
-
-==================
-Arvados PAM Module
-==================
-
-Overview
---------
-
-Accept Arvados API tokens to authenticate to shell accounts.
-
-.. _Arvados: https://arvados.org
-
-Installation
-------------
-
-See http://doc.arvados.org
-
-Testing and Development
------------------------
-
-https://arvados.org/projects/arvados/wiki/Hacking
-describes how to set up a development environment and run tests.
diff --git a/sdk/pam/arvados_pam/__init__.py b/sdk/pam/arvados_pam/__init__.py
deleted file mode 100644 (file)
index dd78d41..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import sys
-sys.argv=['']
-
-from . import auth_event
-
-def pam_sm_authenticate(pamh, flags, argv):
-    config = {}
-    config['arvados_api_host'] = argv[1]
-    config['virtual_machine_hostname'] = argv[2]
-    if len(argv) > 3:
-        for k in argv[3:]:
-            config[k] = True
-
-    try:
-        username = pamh.get_user(None)
-    except pamh.exception as e:
-        return e.pam_result
-
-    if not username:
-        return pamh.PAM_USER_UNKNOWN
-
-    try:
-        prompt = '' if config.get('noprompt') else 'Arvados API token: '
-        token = pamh.conversation(pamh.Message(pamh.PAM_PROMPT_ECHO_OFF, prompt)).resp
-    except pamh.exception as e:
-        return e.pam_result
-
-    if auth_event.AuthEvent(
-            config=config,
-            service=pamh.service,
-            client_host=pamh.rhost,
-            username=username,
-            token=token).can_login():
-        return pamh.PAM_SUCCESS
-    else:
-        return pamh.PAM_AUTH_ERR
-
-def pam_sm_setcred(pamh, flags, argv):
-    return pamh.PAM_SUCCESS
-
-def pam_sm_acct_mgmt(pamh, flags, argv):
-    return pamh.PAM_SUCCESS
-
-def pam_sm_open_session(pamh, flags, argv):
-    return pamh.PAM_SUCCESS
-
-def pam_sm_close_session(pamh, flags, argv):
-    return pamh.PAM_SUCCESS
-
-def pam_sm_chauthtok(pamh, flags, argv):
-    return pamh.PAM_SUCCESS
diff --git a/sdk/pam/arvados_pam/auth_event.py b/sdk/pam/arvados_pam/auth_event.py
deleted file mode 100644 (file)
index 4f2663c..0000000
+++ /dev/null
@@ -1,92 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados
-import syslog
-
-def auth_log(msg):
-    """Log an authentication result to syslogd"""
-    syslog.openlog(facility=syslog.LOG_AUTH)
-    syslog.syslog('arvados_pam: ' + msg)
-    syslog.closelog()
-
-class AuthEvent(object):
-    def __init__(self, config, service, client_host, username, token):
-        self.config = config
-        self.service = service
-        self.client_host = client_host
-        self.username = username
-        self.token = token
-
-        self.api_host = None
-        self.vm_uuid = None
-        self.user = None
-
-    def can_login(self):
-        """Return truthy IFF credentials should be accepted."""
-        ok = False
-        try:
-            self.api_host = self.config['arvados_api_host']
-            self.arv = arvados.api('v1', host=self.api_host, token=self.token,
-                                   insecure=self.config.get('insecure', False),
-                                   cache=False)
-
-            vmname = self.config['virtual_machine_hostname']
-            vms = self.arv.virtual_machines().list(filters=[['hostname','=',vmname]]).execute()
-            if vms['items_available'] > 1:
-                raise Exception("lookup hostname %s returned %d records" % (vmname, vms['items_available']))
-            if vms['items_available'] == 0:
-                raise Exception("lookup hostname %s not found" % vmname)
-            vm = vms['items'][0]
-            if vm['hostname'] != vmname:
-                raise Exception("lookup hostname %s returned hostname %s" % (vmname, vm['hostname']))
-            self.vm_uuid = vm['uuid']
-
-            self.user = self.arv.users().current().execute()
-
-            filters = [
-                ['link_class','=','permission'],
-                ['name','=','can_login'],
-                ['head_uuid','=',self.vm_uuid],
-                ['tail_uuid','=',self.user['uuid']]]
-            for l in self.arv.links().list(filters=filters, limit=10000).execute()['items']:
-                if (l['properties']['username'] == self.username and
-                    l['tail_uuid'] == self.user['uuid'] and
-                    l['head_uuid'] == self.vm_uuid and
-                    l['link_class'] == 'permission' and
-                    l['name'] == 'can_login'):
-                    return self._report(True)
-
-            return self._report(False)
-
-        except Exception as e:
-            return self._report(e)
-
-    def _report(self, result):
-        """Log the result. Return truthy IFF result is True.
-
-        result must be True, False, or an exception.
-        """
-        self.result = result
-        auth_log(self.message())
-        return result == True
-
-    def message(self):
-        """Return a log message describing the event and its outcome."""
-        if isinstance(self.result, Exception):
-            outcome = 'Error: ' + repr(self.result)
-        elif self.result == True:
-            outcome = 'Allow'
-        else:
-            outcome = 'Deny'
-
-        if len(self.token) > 40:
-            log_token = self.token[0:15]
-        else:
-            log_token = '<invalid>'
-
-        log_label = [self.service, self.api_host, self.vm_uuid, self.client_host, self.username, log_token]
-        if self.user:
-            log_label += [self.user.get('uuid'), self.user.get('full_name')]
-        return str(log_label) + ': ' + outcome
diff --git a/sdk/pam/arvados_version.py b/sdk/pam/arvados_version.py
deleted file mode 100644 (file)
index 9aabff4..0000000
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import subprocess
-import time
-import os
-import re
-
-def git_version_at_commit():
-    curdir = os.path.dirname(os.path.abspath(__file__))
-    myhash = subprocess.check_output(['git', 'log', '-n1', '--first-parent',
-                                       '--format=%H', curdir]).strip()
-    myversion = subprocess.check_output([curdir+'/../../build/version-at-commit.sh', myhash]).strip().decode()
-    return myversion
-
-def save_version(setup_dir, module, v):
-  with open(os.path.join(setup_dir, module, "_version.py"), 'wt') as fp:
-      return fp.write("__version__ = '%s'\n" % v)
-
-def read_version(setup_dir, module):
-  with open(os.path.join(setup_dir, module, "_version.py"), 'rt') as fp:
-      return re.match("__version__ = '(.*)'$", fp.read()).groups()[0]
-
-def get_version(setup_dir, module):
-    env_version = os.environ.get("ARVADOS_BUILDING_VERSION")
-
-    if env_version:
-        save_version(setup_dir, module, env_version)
-    else:
-        try:
-            save_version(setup_dir, module, git_version_at_commit())
-        except (subprocess.CalledProcessError, OSError):
-            pass
-
-    return read_version(setup_dir, module)
diff --git a/sdk/pam/examples/shellinabox b/sdk/pam/examples/shellinabox
deleted file mode 100644 (file)
index 2d91ccb..0000000
+++ /dev/null
@@ -1,27 +0,0 @@
-# This example is a stock debian "login" file with libpam_arvados
-# replacing pam_unix, and the "noprompt" option in use. It can be
-# installed as /etc/pam.d/shellinabox .
-
-auth       optional   pam_faildelay.so  delay=3000000
-auth [success=ok new_authtok_reqd=ok ignore=ignore user_unknown=bad default=die] pam_securetty.so
-auth       requisite  pam_nologin.so
-session [success=ok ignore=ignore module_unknown=ignore default=bad] pam_selinux.so close
-session       required   pam_env.so readenv=1
-session       required   pam_env.so readenv=1 envfile=/etc/default/locale
-
-auth [success=1 default=ignore] pam_python.so /usr/local/lib/security/libpam_arvados.py api.example shell.example noprompt
-auth   requisite                       pam_deny.so
-auth   required                        pam_permit.so
-
-auth       optional   pam_group.so
-session    required   pam_limits.so
-session    optional   pam_lastlog.so
-session    optional   pam_motd.so  motd=/run/motd.dynamic
-session    optional   pam_motd.so
-session    optional   pam_mail.so standard
-
-@include common-account
-@include common-session
-@include common-password
-
-session [success=ok ignore=ignore module_unknown=ignore default=bad] pam_selinux.so open
diff --git a/sdk/pam/fpm-info.sh b/sdk/pam/fpm-info.sh
deleted file mode 100644 (file)
index 6c323f5..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-case "$TARGET" in
-    debian* | ubuntu*)
-        fpm_depends+=('libpam-python' 'libcurl3-gnutls')
-        ;;
-    centos*)
-        fpm_depends+=('python-pam')
-        ;;
-    *)
-        echo >&2 "ERROR: $PACKAGE: pam_python.so dependency unavailable in $TARGET."
-        return 1
-        ;;
-esac
-
-case "$FORMAT" in
-    deb)
-        fpm_args+=('--deb-recommends=system-log-daemon')
-        ;;
-esac
diff --git a/sdk/pam/gittaggers.py b/sdk/pam/gittaggers.py
deleted file mode 120000 (symlink)
index d59c02c..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../python/gittaggers.py
\ No newline at end of file
diff --git a/sdk/pam/integration_tests/__init__.py b/sdk/pam/integration_tests/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/sdk/pam/integration_tests/test_pam.py b/sdk/pam/integration_tests/test_pam.py
deleted file mode 100644 (file)
index 32ae38d..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-"""These tests assume we are running (in a docker container) with
-arvados_pam configured and a test API server running.
-"""
-import pam
-import unittest
-
-# From services/api/test/fixtures/api_client_authorizations.yml
-# because that file is not available during integration tests:
-ACTIVE_TOKEN = '3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi'
-SPECTATOR_TOKEN = 'zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu'
-
-class IntegrationTest(unittest.TestCase):
-    def setUp(self):
-        self.p = pam.pam()
-
-    def test_allow(self):
-        self.assertTrue(self.p.authenticate('active', ACTIVE_TOKEN, service='login'))
-
-    def test_deny_bad_token(self):
-        self.assertFalse(self.p.authenticate('active', 'thisisaverybadtoken', service='login'))
-
-    def test_deny_empty_token(self):
-        self.assertFalse(self.p.authenticate('active', '', service='login'))
-
-    def test_deny_permission(self):
-        self.assertFalse(self.p.authenticate('spectator', SPECTATOR_TOKEN, service='login'))
diff --git a/sdk/pam/lib/libpam_arvados.py b/sdk/pam/lib/libpam_arvados.py
deleted file mode 100644 (file)
index 7c3406d..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import sys
-sys.path.append('/usr/share/python2.7/dist/libpam-arvados/lib/python2.7/site-packages')
-from arvados_pam import *
diff --git a/sdk/pam/pam-configs/arvados b/sdk/pam/pam-configs/arvados
deleted file mode 100644 (file)
index 086e176..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-# 1. Change "api.example" to your ARVADOS_API_HOST
-# 2. Change "shell.example" to this host's hostname
-#    (as it appears in the Arvados virtual_machines list)
-# 3. Install in /usr/share/pam-configs/arvados
-# 4. Run `pam-auth-update arvados`
-
-Name: Arvados authentication
-Default: yes
-Priority: 256
-Auth-Type: Primary
-Auth:
-       [success=end default=ignore]    pam_python.so /usr/local/lib/security/libpam_arvados.py api.example shell.example
-Auth-Initial:
-       [success=end default=ignore]    pam_python.so /usr/local/lib/security/libpam_arvados.py api.example shell.example
diff --git a/sdk/pam/setup.py b/sdk/pam/setup.py
deleted file mode 100755 (executable)
index 59b49a1..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-from __future__ import absolute_import
-import glob
-import os
-import sys
-import re
-import subprocess
-
-from setuptools import setup, find_packages
-
-SETUP_DIR = os.path.dirname(__file__) or '.'
-README = os.path.join(SETUP_DIR, 'README.rst')
-
-import arvados_version
-version = arvados_version.get_version(SETUP_DIR, "arvados_pam")
-if os.environ.get('ARVADOS_BUILDING_VERSION', False):
-    pysdk_dep = "=={}".format(version)
-else:
-    # On dev releases, arvados-python-client may have a different timestamp
-    pysdk_dep = "<={}".format(version)
-
-short_tests_only = False
-if '--short-tests-only' in sys.argv:
-    short_tests_only = True
-    sys.argv.remove('--short-tests-only')
-
-setup(name='arvados-pam',
-      version=version,
-      description='Arvados PAM module',
-      long_description=open(README).read(),
-      author='Arvados',
-      author_email='info@arvados.org',
-      url='https://arvados.org',
-      download_url='https://github.com/arvados/arvados.git',
-      license='Apache 2.0',
-      packages=[
-          'arvados_pam',
-      ],
-      scripts=[
-      ],
-      data_files=[
-          ('lib/security', ['lib/libpam_arvados.py']),
-          ('share/pam-configs', ['pam-configs/arvados']),
-          ('share/doc/arvados-pam', ['LICENSE-2.0.txt', 'README.rst']),
-          ('share/doc/arvados-pam/examples', glob.glob('examples/*')),
-      ],
-      install_requires=[
-          'arvados-python-client{}'.format(pysdk_dep),
-      ],
-      test_suite='tests',
-      tests_require=['pbr<1.7.0', 'mock>=1.0', 'python-pam'],
-      zip_safe=False,
-)
diff --git a/sdk/pam/tests/__init__.py b/sdk/pam/tests/__init__.py
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/sdk/pam/tests/integration_test.pl b/sdk/pam/tests/integration_test.pl
deleted file mode 100755 (executable)
index cbe9b0a..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env perl
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-$ENV{ARVADOS_API_HOST_INSECURE} = 1;
-use Authen::PAM qw(:constants);
-
-for my $case (['good', 1, 'active', '3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi'],
-              ['badtoken', 0, 'active', 'badtokenmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi'],
-              ['badusername', 0, 'baduser', '3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi']) {
-    dotest(@$case);
-}
-print "=== OK ===\n";
-
-sub dotest {
-    my ($label, $expect_ok, $user, $token) = @_;
-    print "$label: ";
-    my $service_name = 'login';
-    $main::Token = $token;
-    my $pamh = new Authen::PAM($service_name, $user, \&token_conv_func);
-    ref($pamh) || die "Error code $pamh during PAM init!";
-    $pamh->pam_set_item(PAM_RHOST(), '::1');
-    $pamh->pam_set_item(PAM_RUSER(), 'none');
-    $pamh->pam_set_item(PAM_TTY(), '/dev/null');
-    my $flags = PAM_SILENT();
-    $res = $pamh->pam_authenticate($flags);
-    $msg = $pamh->pam_strerror($res);
-    print "Result (code $res): $msg\n";
-    if (($res == 0) != ($expect_ok == 1)) {
-        die "*** FAIL ***\n";
-    }
-}
-
-sub token_conv_func {
-    my @res;
-    while ( @_ ) {
-        my $code = shift;
-        my $msg = shift;
-        my $ans;
-        print "Message (type $code): $msg\n";
-        if ($code == PAM_PROMPT_ECHO_OFF() || $code == PAM_PROMPT_ECHO_ON()) {
-            $ans = $main::Token;
-        }
-        push @res, (0,$ans);
-    }
-    push @res, PAM_SUCCESS();
-    return @res;
-}
diff --git a/sdk/pam/tests/mocker.py b/sdk/pam/tests/mocker.py
deleted file mode 100644 (file)
index ec6f064..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import mock
-import unittest
-
-class Mocker(unittest.TestCase):
-    ACTIVE_TOKEN = '3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi'
-
-    default_config = {
-        'arvados_api_host': 'zzzzz.api_host.example',
-        'virtual_machine_hostname': 'testvm2.shell',
-    }
-    default_request = {
-        'client_host': '::1',
-        'token': ACTIVE_TOKEN,
-        'username': 'active',
-    }
-    default_response = {
-        'links': {
-            'items': [{
-                'uuid': 'zzzzz-o0j2j-rah2ya1ohx9xaev',
-                'tail_uuid': 'zzzzz-tpzed-xurymjxw79nv3jz',
-                'head_uuid': 'zzzzz-2x53u-382brsig8rp3065',
-                'link_class': 'permission',
-                'name': 'can_login',
-                'properties': {
-                    'username': 'active',
-                },
-            }],
-        },
-        'users': {
-            'uuid': 'zzzzz-tpzed-xurymjxw79nv3jz',
-            'full_name': 'Active User',
-        },
-        'virtual_machines': {
-            'items': [{
-                'uuid': 'zzzzz-2x53u-382brsig8rp3065',
-                'hostname': 'testvm2.shell',
-            }],
-            'items_available': 1,
-        },
-    }
-
-    def setUp(self):
-        self.config = self.default_config.copy()
-        self.request = self.default_request.copy()
-        self.response = self.default_response.copy()
-        self.api_client = mock.MagicMock(name='api_client')
-        self.api_client.users().current().execute.side_effect = lambda: self.response['users']
-        self.api_client.virtual_machines().list().execute.side_effect = lambda: self.response['virtual_machines']
-        self.api_client.links().list().execute.side_effect = lambda: self.response['links']
-        patcher = mock.patch('arvados.api')
-        self.api = patcher.start()
-        self.addCleanup(patcher.stop)
-        self.api.side_effect = [self.api_client]
-
-        self.syslogged = []
-        patcher = mock.patch('syslog.syslog')
-        self.syslog = patcher.start()
-        self.addCleanup(patcher.stop)
-        self.syslog.side_effect = lambda s: self.syslogged.append(s)
diff --git a/sdk/pam/tests/test_auth_event.py b/sdk/pam/tests/test_auth_event.py
deleted file mode 100644 (file)
index f907b31..0000000
+++ /dev/null
@@ -1,99 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados_pam
-import re
-from . import mocker
-
-class AuthEventTest(mocker.Mocker):
-    def attempt(self):
-        return arvados_pam.auth_event.AuthEvent(config=self.config, service='test_service', **self.request).can_login()
-
-    def test_success(self):
-        self.assertTrue(self.attempt())
-
-        self.api_client.virtual_machines().list.assert_called_with(
-            filters=[['hostname','=',self.config['virtual_machine_hostname']]])
-        self.api.assert_called_with(
-            'v1',
-            host=self.config['arvados_api_host'], token=self.request['token'],
-            insecure=False,
-            cache=False)
-        self.assertEqual(1, len(self.syslogged))
-        for i in ['test_service',
-                  self.request['username'],
-                  self.config['arvados_api_host'],
-                  self.response['virtual_machines']['items'][0]['uuid']]:
-            self.assertRegexpMatches(self.syslogged[0], re.escape(i))
-        self.assertRegexpMatches(self.syslogged[0], re.escape(self.request['token'][0:15]), 'token prefix not logged')
-        self.assertNotRegexpMatches(self.syslogged[0], re.escape(self.request['token'][15:30]), 'too much token logged')
-
-    def test_fail_vm_lookup(self):
-        self.api_client.virtual_machines().list().execute.side_effect = Exception("Test-induced failure")
-        self.assertFalse(self.attempt())
-        self.assertRegexpMatches(self.syslogged[0], 'Test-induced failure')
-
-    def test_vm_hostname_not_found(self):
-        self.response['virtual_machines'] = {
-            'items': [],
-            'items_available': 0,
-        }
-        self.assertFalse(self.attempt())
-
-    def test_vm_hostname_ambiguous(self):
-        self.response['virtual_machines'] = {
-            'items': [
-                {
-                    'uuid': 'zzzzz-2x53u-382brsig8rp3065',
-                    'hostname': 'testvm2.shell',
-                },
-                {
-                    'uuid': 'zzzzz-2x53u-382brsig8rp3065',
-                    'hostname': 'testvm2.shell',
-                },
-            ],
-            'items_available': 2,
-        }
-        self.assertFalse(self.attempt())
-
-    def test_server_ignores_vm_filters(self):
-        self.response['virtual_machines'] = {
-            'items': [
-                {
-                    'uuid': 'zzzzz-2x53u-382brsig8rp3065',
-                    'hostname': 'testvm22.shell', # <-----
-                },
-            ],
-            'items_available': 1,
-        }
-        self.assertFalse(self.attempt())
-
-    def test_fail_user_lookup(self):
-        self.api_client.users().current().execute.side_effect = Exception("Test-induced failure")
-        self.assertFalse(self.attempt())
-
-    def test_fail_permission_check(self):
-        self.api_client.links().list().execute.side_effect = Exception("Test-induced failure")
-        self.assertFalse(self.attempt())
-
-    def test_no_login_permission(self):
-        self.response['links'] = {
-            'items': [],
-        }
-        self.assertFalse(self.attempt())
-
-    def test_server_ignores_permission_filters(self):
-        self.response['links'] = {
-            'items': [{
-                'uuid': 'zzzzz-o0j2j-rah2ya1ohx9xaev',
-                'tail_uuid': 'zzzzz-tpzed-xurymjxw79nv3jz',
-                'head_uuid': 'zzzzz-2x53u-382brsig8rp3065',
-                'link_class': 'permission',
-                'name': 'CANT_login', # <-----
-                'properties': {
-                    'username': 'active',
-                },
-            }],
-        }
-        self.assertFalse(self.attempt())
diff --git a/sdk/pam/tests/test_pam_sm.py b/sdk/pam/tests/test_pam_sm.py
deleted file mode 100644 (file)
index 53597c0..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import arvados_pam
-import mock
-from . import mocker
-
-class PamSMTest(mocker.Mocker):
-    def attempt(self):
-        return arvados_pam.pam_sm_authenticate(self.pamh, 0, self.argv)
-
-    def test_success(self):
-        self.assertEqual(self.pamh.PAM_SUCCESS, self.attempt())
-
-    def test_bad_user(self):
-        self.pamh.get_user = mock.MagicMock(return_value='badusername')
-        self.assertEqual(self.pamh.PAM_AUTH_ERR, self.attempt())
-
-    def test_bad_vm(self):
-        self.argv[2] = 'testvm22.shell'
-        self.assertEqual(self.pamh.PAM_AUTH_ERR, self.attempt())
-
-    def setUp(self):
-        super(PamSMTest, self).setUp()
-        self.pamh = mock.MagicMock()
-        self.pamh.get_user = mock.MagicMock(return_value='active')
-        self.pamh.PAM_SUCCESS = 12345
-        self.pamh.PAM_AUTH_ERR = 54321
-        self.argv = [__file__, 'zzzzz.arvadosapi.com', 'testvm2.shell']
index 793e981b505abb63cef5730b5f299afaf42564a6..0962779658358c95006435c8566f1c8d3c1e97fd 100644 (file)
@@ -7,7 +7,6 @@
 
 # Sensitive files and local configuration
 /config/database.yml
-/config/initializers/omniauth.rb
 /config/application.yml
 
 # asset cache
@@ -35,4 +34,4 @@
 /package-build.version
 
 # Debugger history
-.byebug_history
\ No newline at end of file
+.byebug_history
index 18797d69c68e6fc0d9d39550a86c3a2ba916cb24..1e12d6a4ce790ec9f9abdfe77ee08044795f8a71 100644 (file)
@@ -4,12 +4,11 @@
 
 source 'https://rubygems.org'
 
-gem 'rails', '~> 5.0.0'
+gem 'rails', '~> 5.2.0'
 gem 'responders', '~> 2.0'
 
 group :test, :development do
   gem 'factory_bot_rails'
-  gem 'database_cleaner'
 
   # As of now (2019-03-27) There's an open issue about incompatibilities with
   # newer versions of this gem: https://github.com/rails/rails-perftest/issues/38
@@ -23,8 +22,12 @@ group :test, :development do
   gem 'simplecov-rcov', require: false
   gem 'mocha', require: false
   gem 'byebug'
+  gem 'listen'
 end
 
+# Fast app boot times
+gem 'bootsnap', require: false
+
 gem 'pg', '~> 1.0'
 
 gem 'multi_json'
index 127a09ee2db71a00bc7c05ee5e2e651ea379a33d..4279151899da9a0051e8e69476f9f4abee672803 100644 (file)
@@ -22,39 +22,43 @@ GIT
 GEM
   remote: https://rubygems.org/
   specs:
-    actioncable (5.0.7.2)
-      actionpack (= 5.0.7.2)
-      nio4r (>= 1.2, < 3.0)
-      websocket-driver (~> 0.6.1)
-    actionmailer (5.0.7.2)
-      actionpack (= 5.0.7.2)
-      actionview (= 5.0.7.2)
-      activejob (= 5.0.7.2)
+    actioncable (5.2.4.3)
+      actionpack (= 5.2.4.3)
+      nio4r (~> 2.0)
+      websocket-driver (>= 0.6.1)
+    actionmailer (5.2.4.3)
+      actionpack (= 5.2.4.3)
+      actionview (= 5.2.4.3)
+      activejob (= 5.2.4.3)
       mail (~> 2.5, >= 2.5.4)
       rails-dom-testing (~> 2.0)
-    actionpack (5.0.7.2)
-      actionview (= 5.0.7.2)
-      activesupport (= 5.0.7.2)
-      rack (~> 2.0)
-      rack-test (~> 0.6.3)
+    actionpack (5.2.4.3)
+      actionview (= 5.2.4.3)
+      activesupport (= 5.2.4.3)
+      rack (~> 2.0, >= 2.0.8)
+      rack-test (>= 0.6.3)
       rails-dom-testing (~> 2.0)
       rails-html-sanitizer (~> 1.0, >= 1.0.2)
-    actionview (5.0.7.2)
-      activesupport (= 5.0.7.2)
+    actionview (5.2.4.3)
+      activesupport (= 5.2.4.3)
       builder (~> 3.1)
-      erubis (~> 2.7.0)
+      erubi (~> 1.4)
       rails-dom-testing (~> 2.0)
       rails-html-sanitizer (~> 1.0, >= 1.0.3)
-    activejob (5.0.7.2)
-      activesupport (= 5.0.7.2)
+    activejob (5.2.4.3)
+      activesupport (= 5.2.4.3)
       globalid (>= 0.3.6)
-    activemodel (5.0.7.2)
-      activesupport (= 5.0.7.2)
-    activerecord (5.0.7.2)
-      activemodel (= 5.0.7.2)
-      activesupport (= 5.0.7.2)
-      arel (~> 7.0)
-    activesupport (5.0.7.2)
+    activemodel (5.2.4.3)
+      activesupport (= 5.2.4.3)
+    activerecord (5.2.4.3)
+      activemodel (= 5.2.4.3)
+      activesupport (= 5.2.4.3)
+      arel (>= 9.0)
+    activestorage (5.2.4.3)
+      actionpack (= 5.2.4.3)
+      activerecord (= 5.2.4.3)
+      marcel (~> 0.3.1)
+    activesupport (5.2.4.3)
       concurrent-ruby (~> 1.0, >= 1.0.2)
       i18n (>= 0.7, < 2)
       minitest (~> 5.1)
@@ -66,9 +70,9 @@ GEM
     addressable (2.7.0)
       public_suffix (>= 2.0.2, < 5.0)
     andand (1.3.3)
-    arel (7.1.4)
-    arvados-google-api-client (0.8.7.3)
-      activesupport (>= 3.2, < 5.1)
+    arel (9.0.0)
+    arvados-google-api-client (0.8.7.4)
+      activesupport (>= 3.2, < 5.3)
       addressable (~> 2.3)
       autoparse (~> 0.3)
       extlib (~> 0.9)
@@ -82,7 +86,9 @@ GEM
       addressable (>= 2.3.1)
       extlib (>= 0.9.15)
       multi_json (>= 1.0.0)
-    builder (3.2.3)
+    bootsnap (1.4.7)
+      msgpack (~> 1.0)
+    builder (3.2.4)
     byebug (11.0.1)
     capistrano (2.15.9)
       highline
@@ -90,10 +96,9 @@ GEM
       net-sftp (>= 2.0.0)
       net-ssh (>= 2.0.14)
       net-ssh-gateway (>= 1.1.0)
-    concurrent-ruby (1.1.5)
-    crass (1.0.4)
-    database_cleaner (1.7.0)
-    erubis (2.7.0)
+    concurrent-ruby (1.1.6)
+    crass (1.0.6)
+    erubi (1.9.0)
     execjs (2.7.0)
     extlib (0.9.16)
     factory_bot (5.0.2)
@@ -127,25 +132,32 @@ GEM
     launchy (2.4.3)
       addressable (~> 2.3)
     libv8 (3.16.14.19)
+    listen (3.2.1)
+      rb-fsevent (~> 0.10, >= 0.10.3)
+      rb-inotify (~> 0.9, >= 0.9.10)
     lograge (0.10.0)
       actionpack (>= 4)
       activesupport (>= 4)
       railties (>= 4)
       request_store (~> 1.0)
     logstash-event (1.2.02)
-    loofah (2.2.3)
+    loofah (2.6.0)
       crass (~> 1.0.2)
       nokogiri (>= 1.5.9)
     mail (2.7.1)
       mini_mime (>= 0.1.1)
+    marcel (0.3.3)
+      mimemagic (~> 0.3.2)
     memoist (0.16.2)
     metaclass (0.0.4)
-    method_source (0.9.2)
-    mini_mime (1.0.1)
+    method_source (1.0.0)
+    mimemagic (0.3.5)
+    mini_mime (1.0.2)
     mini_portile2 (2.4.0)
     minitest (5.10.3)
     mocha (1.8.0)
       metaclass (~> 0.0.1)
+    msgpack (1.3.3)
     multi_json (1.14.1)
     multi_xml (0.6.0)
     multipart-post (2.1.1)
@@ -156,8 +168,8 @@ GEM
     net-ssh (5.2.0)
     net-ssh-gateway (2.0.0)
       net-ssh (>= 4.0.0)
-    nio4r (2.3.1)
-    nokogiri (1.10.8)
+    nio4r (2.5.2)
+    nokogiri (1.10.10)
       mini_portile2 (~> 2.4.0)
     oauth2 (1.4.1)
       faraday (>= 0.8, < 0.16.0)
@@ -181,19 +193,20 @@ GEM
     power_assert (1.1.4)
     public_suffix (4.0.3)
     rack (2.2.3)
-    rack-test (0.6.3)
-      rack (>= 1.0)
-    rails (5.0.7.2)
-      actioncable (= 5.0.7.2)
-      actionmailer (= 5.0.7.2)
-      actionpack (= 5.0.7.2)
-      actionview (= 5.0.7.2)
-      activejob (= 5.0.7.2)
-      activemodel (= 5.0.7.2)
-      activerecord (= 5.0.7.2)
-      activesupport (= 5.0.7.2)
+    rack-test (1.1.0)
+      rack (>= 1.0, < 3)
+    rails (5.2.4.3)
+      actioncable (= 5.2.4.3)
+      actionmailer (= 5.2.4.3)
+      actionpack (= 5.2.4.3)
+      actionview (= 5.2.4.3)
+      activejob (= 5.2.4.3)
+      activemodel (= 5.2.4.3)
+      activerecord (= 5.2.4.3)
+      activestorage (= 5.2.4.3)
+      activesupport (= 5.2.4.3)
       bundler (>= 1.3.0)
-      railties (= 5.0.7.2)
+      railties (= 5.2.4.3)
       sprockets-rails (>= 2.0.0)
     rails-controller-testing (1.0.4)
       actionpack (>= 5.0.1.x)
@@ -202,17 +215,17 @@ GEM
     rails-dom-testing (2.0.3)
       activesupport (>= 4.2.0)
       nokogiri (>= 1.6)
-    rails-html-sanitizer (1.0.4)
-      loofah (~> 2.2, >= 2.2.2)
+    rails-html-sanitizer (1.3.0)
+      loofah (~> 2.3)
     rails-observers (0.1.5)
       activemodel (>= 4.0)
     rails-perftest (0.0.7)
-    railties (5.0.7.2)
-      actionpack (= 5.0.7.2)
-      activesupport (= 5.0.7.2)
+    railties (5.2.4.3)
+      actionpack (= 5.2.4.3)
+      activesupport (= 5.2.4.3)
       method_source
       rake (>= 0.8.7)
-      thor (>= 0.18.1, < 2.0)
+      thor (>= 0.19.0, < 2.0)
     rake (13.0.1)
     rb-fsevent (0.10.3)
     rb-inotify (0.9.10)
@@ -263,15 +276,15 @@ GEM
     therubyracer (0.12.3)
       libv8 (~> 3.16.14.15)
       ref
-    thor (0.20.3)
+    thor (1.0.1)
     thread_safe (0.3.6)
     tilt (2.0.8)
-    tzinfo (1.2.6)
+    tzinfo (1.2.7)
       thread_safe (~> 0.1)
     uglifier (2.7.2)
       execjs (>= 0.3.0)
       json (>= 1.8.0)
-    websocket-driver (0.6.5)
+    websocket-driver (0.7.3)
       websocket-extensions (>= 0.1.0)
     websocket-extensions (0.1.5)
 
@@ -282,11 +295,12 @@ DEPENDENCIES
   acts_as_api
   andand
   arvados!
+  bootsnap
   byebug
-  database_cleaner
   factory_bot_rails
   httpclient
   jquery-rails
+  listen
   lograge
   logstash-event
   minitest (= 5.10.3)
@@ -298,7 +312,7 @@ DEPENDENCIES
   optimist
   passenger
   pg (~> 1.0)
-  rails (~> 5.0.0)
+  rails (~> 5.2.0)
   rails-controller-testing
   rails-observers
   rails-perftest
@@ -317,4 +331,4 @@ DEPENDENCIES
   uglifier (~> 2.0)
 
 BUNDLED WITH
-   1.16.6
+   1.17.3
index 83a233cd54681b18b9fb6bb12c72642a2e95cae4..2644a06579787082d8e1c7421a5288a085450684 100644 (file)
@@ -63,7 +63,6 @@ class ApplicationController < ActionController::Base
                 :with => :render_error)
     rescue_from(ActiveRecord::RecordNotFound,
                 ActionController::RoutingError,
-                ActionController::UnknownController,
                 AbstractController::ActionNotFound,
                 :with => :render_not_found)
   end
@@ -361,7 +360,7 @@ class ApplicationController < ActionController::Base
     %w(created_at modified_by_client_uuid modified_by_user_uuid modified_at).each do |x|
       @attrs.delete x.to_sym
     end
-    @attrs = @attrs.symbolize_keys if @attrs.is_a? HashWithIndifferentAccess
+    @attrs = @attrs.symbolize_keys if @attrs.is_a? ActiveSupport::HashWithIndifferentAccess
     @attrs
   end
 
index 582b98cf2dc9d9e20b88cf0180b7a9db19fbfd8f..8e3c3ac5e3d8b8656d587e86626f86f57c33b045 100644 (file)
@@ -147,10 +147,15 @@ class UserSessionsController < ApplicationController
         find_or_create_by(url_prefix: api_client_url_prefix)
     end
 
+    token_expiration = nil
+    if Rails.configuration.Login.TokenLifetime > 0
+      token_expiration = Time.now + Rails.configuration.Login.TokenLifetime
+    end
     @api_client_auth = ApiClientAuthorization.
       new(user: user,
           api_client: @api_client,
           created_by_ip_address: remote_ip,
+          expires_at: token_expiration,
           scopes: ["all"])
     @api_client_auth.save!
 
index 8ed693f820d5eac0eff9389ac851166e800d6516..c6c48a5b6b13c803d8d54d660a2d8fbd2a265740 100644 (file)
@@ -15,13 +15,16 @@ class ApiClient < ArvadosModel
   end
 
   def is_trusted
-    norm(self.url_prefix) == norm(Rails.configuration.Services.Workbench1.ExternalURL) ||
-      norm(self.url_prefix) == norm(Rails.configuration.Services.Workbench2.ExternalURL) ||
-      super
+    (from_trusted_url && Rails.configuration.Login.TokenLifetime == 0) || super
   end
 
   protected
 
+  def from_trusted_url
+    norm(self.url_prefix) == norm(Rails.configuration.Services.Workbench1.ExternalURL) ||
+      norm(self.url_prefix) == norm(Rails.configuration.Services.Workbench2.ExternalURL)
+  end
+
   def norm url
     # normalize URL for comparison
     url = URI(url)
index 6057c4d2698c8e1bb3d131d7dfcd9d0a8c85ea0d..a4d49c35c1fc4c73c490375921c7b2bf3a94c97b 100644 (file)
@@ -325,6 +325,7 @@ class ApiClientAuthorization < ArvadosModel
   end
 
   def log_update
-    super unless (changed - UNLOGGED_CHANGES).empty?
+
+    super unless (saved_changes.keys - UNLOGGED_CHANGES).empty?
   end
 end
index 67794208de7c999c7b8b0b3a8c451f2b7bb36c57..6fb8ff2b33549af8e4e512a1374363f8dee8fa64 100644 (file)
@@ -16,6 +16,7 @@ class ArvadosModel < ApplicationRecord
   include DbCurrentTime
   extend RecordFilters
 
+  after_find :schedule_restoring_changes
   after_initialize :log_start_state
   before_save :ensure_permission_to_save
   before_save :ensure_owner_uuid_is_permitted
@@ -137,6 +138,7 @@ class ArvadosModel < ApplicationRecord
   def reload(*args)
     super
     log_start_state
+    self
   end
 
   def self.create raw_params={}, *args
@@ -838,10 +840,24 @@ class ArvadosModel < ApplicationRecord
              Rails.configuration.AuditLogs.MaxDeleteBatch.to_i > 0)
   end
 
+  def schedule_restoring_changes
+    # This will be checked at log_start_state, to reset any (virtual) changes
+    # produced by the act of reading a serialized attribute.
+    @fresh_from_database = true
+  end
+
   def log_start_state
     if is_audit_logging_enabled?
       @old_attributes = Marshal.load(Marshal.dump(attributes))
       @old_logged_attributes = Marshal.load(Marshal.dump(logged_attributes))
+      if @fresh_from_database
+        # This instance was created from reading a database record. Attributes
+        # haven't been changed, but those serialized attributes will be reported
+        # as unpersisted, so we restore them to avoid issues with lock!() and
+        # with_lock().
+        restore_attributes
+        @fresh_from_database = nil
+      end
     end
   end
 
index caac5611e79c8baa43d30e396b33cc4a92f9d146..8b549a71ab4fba348ab9279f456595912fb693db 100644 (file)
@@ -259,9 +259,10 @@ class Collection < ArvadosModel
     should_preserve_version = should_preserve_version? # Time sensitive, cache value
     return(yield) unless (should_preserve_version || syncable_updates.any?)
 
-    # Put aside the changes because with_lock forces a record reload
+    # Put aside the changes because with_lock does a record reload
     changes = self.changes
     snapshot = nil
+    restore_attributes
     with_lock do
       # Copy the original state to save it as old version
       if should_preserve_version
@@ -303,12 +304,18 @@ class Collection < ArvadosModel
 
   def syncable_updates
     updates = {}
-    (syncable_attrs & self.changes.keys).each do |attr|
+    if self.changes.any?
+      changes = self.changes
+    else
+      # If called after save...
+      changes = self.saved_changes
+    end
+    (syncable_attrs & changes.keys).each do |attr|
       if attr == 'uuid'
         # Point old versions to current version's new UUID
-        updates['current_version_uuid'] = self.changes[attr].last
+        updates['current_version_uuid'] = changes[attr].last
       else
-        updates[attr] = self.changes[attr].last
+        updates[attr] = changes[attr].last
       end
     end
     return updates
@@ -316,7 +323,7 @@ class Collection < ArvadosModel
 
   def sync_past_versions
     updates = self.syncable_updates
-    Collection.where('current_version_uuid = ? AND uuid != ?', self.uuid_was, self.uuid_was).each do |c|
+    Collection.where('current_version_uuid = ? AND uuid != ?', self.uuid_before_last_save, self.uuid_before_last_save).each do |c|
       c.attributes = updates
       # Use a different validation context to skip the 'past_versions_cannot_be_updated'
       # validator, as on this case it is legal to update some fields.
index 912a801a6fb1820724489216f0ec38d99bd80210..5833c2251f9b8db26a5ebf5834130d96fc4690d0 100644 (file)
@@ -138,7 +138,7 @@ class Container < ArvadosModel
   end
 
   def propagate_priority
-    return true unless priority_changed?
+    return true unless saved_change_to_priority?
     act_as_system_user do
       # Update the priority of child container requests to match new
       # priority of the parent container (ignoring requests with no
@@ -387,7 +387,7 @@ class Container < ArvadosModel
     if users_list.select { |u| u.is_admin }.any?
       return super
     end
-    Container.where(ContainerRequest.readable_by(*users_list).where("containers.uuid = container_requests.container_uuid").exists)
+    Container.where(ContainerRequest.readable_by(*users_list).where("containers.uuid = container_requests.container_uuid").arel.exists)
   end
 
   def final?
@@ -556,7 +556,7 @@ class Container < ArvadosModel
     # If self.final?, this update is superfluous: the final log/output
     # update will be done when handle_completed calls finalize! on
     # each requesting CR.
-    return if self.final? || !self.log_changed?
+    return if self.final? || !saved_change_to_log?
     leave_modified_by_user_alone do
       ContainerRequest.where(container_uuid: self.uuid).each do |cr|
         cr.update_collections(container: self, collections: ['log'])
@@ -653,11 +653,11 @@ class Container < ArvadosModel
   def handle_completed
     # This container is finished so finalize any associated container requests
     # that are associated with this container.
-    if self.state_changed? and self.final?
+    if saved_change_to_state? and self.final?
       # These get wiped out by with_lock (which reloads the record),
       # so record them now in case we need to schedule a retry.
-      prev_secret_mounts = self.secret_mounts_was
-      prev_runtime_token = self.runtime_token_was
+      prev_secret_mounts = secret_mounts_before_last_save
+      prev_runtime_token = runtime_token_before_last_save
 
       # Need to take a lock on the container to ensure that any
       # concurrent container requests that might try to reuse this
index b30b8cc1d9b24cc2bfcbeac7400afaa38cd03fa4..77536eee4f28f53a2acae66cc90d647967ff6b51 100644 (file)
@@ -472,10 +472,10 @@ class ContainerRequest < ArvadosModel
   end
 
   def update_priority
-    return unless state_changed? || priority_changed? || container_uuid_changed?
+    return unless saved_change_to_state? || saved_change_to_priority? || saved_change_to_container_uuid?
     act_as_system_user do
       Container.
-        where('uuid in (?)', [self.container_uuid_was, self.container_uuid].compact).
+        where('uuid in (?)', [container_uuid_before_last_save, self.container_uuid].compact).
         map(&:update_priority!)
     end
   end
index 02c6a242f911ddcaebd3a4ae68113c546d5487bd..7e015f3564e7475f6103e8f4a42c5beb5bf53c83 100644 (file)
@@ -57,7 +57,7 @@ class Group < ArvadosModel
   end
 
   def update_trash
-    if trash_at_changed? or owner_uuid_changed?
+    if saved_change_to_trash_at? or saved_change_to_owner_uuid?
       # The group was added or removed from the trash.
       #
       # Strategy:
@@ -97,7 +97,7 @@ on conflict (group_uuid) do update set trash_at=EXCLUDED.trash_at;
   end
 
   def after_ownership_change
-    if owner_uuid_changed?
+    if saved_change_to_owner_uuid?
       update_permissions self.owner_uuid, self.uuid, CAN_MANAGE_PERM
     end
   end
index d200bb80110869ade17386d3ebbac9cf9b8de979..c8b463696bb5423b1d5a5f7f5533b95637246165 100644 (file)
@@ -168,7 +168,7 @@ class Node < ArvadosModel
   end
 
   def dns_server_update
-    if ip_address_changed? && ip_address
+    if saved_change_to_ip_address? && ip_address
       Node.where('id != ? and ip_address = ?',
                  id, ip_address).each do |stale_node|
         # One or more(!) stale node records have the same IP address
@@ -178,10 +178,10 @@ class Node < ArvadosModel
         stale_node.update_attributes!(ip_address: nil)
       end
     end
-    if hostname_was && hostname_changed?
-      self.class.dns_server_update(hostname_was, UNUSED_NODE_IP)
+    if hostname_before_last_save && saved_change_to_hostname?
+      self.class.dns_server_update(hostname_before_last_save, UNUSED_NODE_IP)
     end
-    if hostname && (hostname_changed? || ip_address_changed?)
+    if hostname && (saved_change_to_hostname? || saved_change_to_ip_address?)
       self.class.dns_server_update(hostname, ip_address || UNUSED_NODE_IP)
     end
   end
index 64facaa98e84c2eacfdc6fed38372f2dff22fdde..778ad7d0bb1728c22ad45dcfecdc5264f1c65312 100644 (file)
@@ -23,32 +23,32 @@ class User < ArvadosModel
   validate :must_unsetup_to_deactivate
   before_update :prevent_privilege_escalation
   before_update :prevent_inactive_admin
-  before_update :verify_repositories_empty, :if => Proc.new { |user|
-    user.username.nil? and user.username_changed?
+  before_update :verify_repositories_empty, :if => Proc.new {
+    username.nil? and username_changed?
   }
   before_update :setup_on_activate
 
   before_create :check_auto_admin
-  before_create :set_initial_username, :if => Proc.new { |user|
-    user.username.nil? and user.email
+  before_create :set_initial_username, :if => Proc.new {
+    username.nil? and email
   }
   after_create :after_ownership_change
   after_create :setup_on_activate
   after_create :add_system_group_permission_link
-  after_create :auto_setup_new_user, :if => Proc.new { |user|
+  after_create :auto_setup_new_user, :if => Proc.new {
     Rails.configuration.Users.AutoSetupNewUsers and
-    (user.uuid != system_user_uuid) and
-    (user.uuid != anonymous_user_uuid)
+    (uuid != system_user_uuid) and
+    (uuid != anonymous_user_uuid)
   }
   after_create :send_admin_notifications
 
   before_update :before_ownership_change
   after_update :after_ownership_change
   after_update :send_profile_created_notification
-  after_update :sync_repository_names, :if => Proc.new { |user|
-    (user.uuid != system_user_uuid) and
-    user.username_changed? and
-    (not user.username_was.nil?)
+  after_update :sync_repository_names, :if => Proc.new {
+    (uuid != system_user_uuid) and
+    saved_change_to_username? and
+    (not username_before_last_save.nil?)
   }
   before_destroy :clear_permissions
   after_destroy :remove_self_from_permissions
@@ -151,7 +151,7 @@ SELECT 1 FROM #{PERMISSION_VIEW}
   end
 
   def after_ownership_change
-    if owner_uuid_changed?
+    if saved_change_to_owner_uuid?
       update_permissions self.owner_uuid, self.uuid, CAN_MANAGE_PERM
     end
   end
@@ -241,11 +241,8 @@ SELECT target_uuid, perm_level
                      name: 'can_login').destroy_all
 
     # delete "All users" group read permissions for this user
-    group = Group.where(name: 'All users').select do |g|
-      g[:uuid].match(/-f+$/)
-    end.first
     Link.where(tail_uuid: self.uuid,
-                     head_uuid: group[:uuid],
+                     head_uuid: all_users_group_uuid,
                      link_class: 'permission',
                      name: 'can_read').destroy_all
 
@@ -272,10 +269,6 @@ SELECT target_uuid, perm_level
        self.is_active_was &&
        !self.is_active
 
-      group = Group.where(name: 'All users').select do |g|
-        g[:uuid].match(/-f+$/)
-      end.first
-
       # When a user is set up, they are added to the "All users"
       # group.  A user that is part of the "All users" group is
       # allowed to self-activate.
@@ -290,7 +283,7 @@ SELECT target_uuid, perm_level
       # explaining the correct way to deactivate a user.
       #
       if Link.where(tail_uuid: self.uuid,
-                    head_uuid: group[:uuid],
+                    head_uuid: all_users_group_uuid,
                     link_class: 'permission',
                     name: 'can_read').any?
         errors.add :is_active, "cannot be set to false directly, use the 'Deactivate' button on Workbench, or the 'unsetup' API call"
@@ -711,11 +704,11 @@ update #{PERMISSION_VIEW} set target_uuid=$1 where target_uuid = $2
   # add the user to the 'All users' group
   def create_user_group_link
     return (Link.where(tail_uuid: self.uuid,
-                       head_uuid: all_users_group[:uuid],
+                       head_uuid: all_users_group_uuid,
                        link_class: 'permission',
                        name: 'can_read').first or
             Link.create(tail_uuid: self.uuid,
-                        head_uuid: all_users_group[:uuid],
+                        head_uuid: all_users_group_uuid,
                         link_class: 'permission',
                         name: 'can_read'))
   end
@@ -743,7 +736,8 @@ update #{PERMISSION_VIEW} set target_uuid=$1 where target_uuid = $2
   # Automatically setup if is_active flag turns on
   def setup_on_activate
     return if [system_user_uuid, anonymous_user_uuid].include?(self.uuid)
-    if is_active && (new_record? || is_active_changed?)
+    if is_active &&
+      (new_record? || saved_change_to_is_active? || will_save_change_to_is_active?)
       setup
     end
   end
@@ -766,8 +760,8 @@ update #{PERMISSION_VIEW} set target_uuid=$1 where target_uuid = $2
 
   # Send notification if the user saved profile for the first time
   def send_profile_created_notification
-    if self.prefs_changed?
-      if self.prefs_was.andand.empty? || !self.prefs_was.andand['profile']
+    if saved_change_to_prefs?
+      if prefs_before_last_save.andand.empty? || !prefs_before_last_save.andand['profile']
         profile_notification_address = Rails.configuration.Users.UserProfileNotificationAddress
         ProfileNotifier.profile_created(self, profile_notification_address).deliver_now if profile_notification_address and !profile_notification_address.empty?
       end
@@ -782,7 +776,7 @@ update #{PERMISSION_VIEW} set target_uuid=$1 where target_uuid = $2
   end
 
   def sync_repository_names
-    old_name_re = /^#{Regexp.escape(username_was)}\//
+    old_name_re = /^#{Regexp.escape(username_before_last_save)}\//
     name_sub = "#{username}/"
     repositories.find_each do |repo|
       repo.name = repo.name.sub(old_name_re, name_sub)
index 044b5ca2318afe4f90c913d94cadf9ab5ddf7964..00d640cf7cf156097b9739a34f71e65eb284d48d 100755 (executable)
@@ -4,5 +4,5 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __dir__)
 load Gem.bin_path('bundler', 'bundle')
index 2e4d28c58d85e8640cf46a2b11a9e112575c7c13..c9142b942ed12a848a4497a01ad7393dfd78d370 100755 (executable)
@@ -4,12 +4,11 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-require 'pathname'
 require 'fileutils'
 include FileUtils
 
 # path to your application root.
-APP_ROOT = Pathname.new File.expand_path('../../', __FILE__)
+APP_ROOT = File.expand_path('..', __dir__)
 
 def system!(*args)
   system(*args) || abort("\n== Command #{args} failed ==")
index 07a3df93e48b0b2eaacc35e59683caefe9ff2efb..201287ef61e8859930cb93cc03cb81f20c12b4ff 100755 (executable)
@@ -4,12 +4,11 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-require 'pathname'
 require 'fileutils'
 include FileUtils
 
 # path to your application root.
-APP_ROOT = Pathname.new File.expand_path('../../', __FILE__)
+APP_ROOT = File.expand_path('..', __dir__)
 
 def system!(*args)
   system(*args) || abort("\n== Command #{args} failed ==")
diff --git a/services/api/bin/yarn b/services/api/bin/yarn
new file mode 100755 (executable)
index 0000000..cc54a3b
--- /dev/null
@@ -0,0 +1,16 @@
+#!/usr/bin/env ruby
+
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+APP_ROOT = File.expand_path('..', __dir__)
+Dir.chdir(APP_ROOT) do
+  begin
+    exec "yarnpkg", *ARGV
+  rescue Errno::ENOENT
+    $stderr.puts "Yarn executable was not detected in the system."
+    $stderr.puts "Download Yarn at https://yarnpkg.com/en/docs/install"
+    exit 1
+  end
+end
index b6174a0d8989f36e2e851431b18fe1627a33dbb8..b28ae0e0718e2ddabc472f61be8cf8c07a53232f 100644 (file)
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-require File.expand_path('../boot', __FILE__)
+require_relative 'boot'
 
 require "rails"
 # Pick only the frameworks we need:
@@ -12,10 +12,11 @@ require "active_record/railtie"
 require "action_controller/railtie"
 require "action_mailer/railtie"
 require "action_view/railtie"
-# Skip ActionCable (new in Rails 5.0) as it adds '/cable' routes that we're not using
-# require "action_cable/engine"
 require "sprockets/railtie"
 require "rails/test_unit/railtie"
+# Skipping the following:
+# * ActionCable (new in Rails 5.0) as it adds '/cable' routes that we're not using
+# * ActiveStorage (new in Rails 5.1)
 
 require 'digest'
 
index 035a3972f86c318e758318330c7aa63af44ff9c5..4f831160e9351790143505cc16447e6d0507b0e3 100644 (file)
@@ -111,6 +111,7 @@ arvcfg.declare_config "Login.SSO.ProviderAppSecret", String, :sso_app_secret
 arvcfg.declare_config "Login.SSO.ProviderAppID", String, :sso_app_id
 arvcfg.declare_config "Login.LoginCluster", String
 arvcfg.declare_config "Login.RemoteTokenRefresh", ActiveSupport::Duration
+arvcfg.declare_config "Login.TokenLifetime", ActiveSupport::Duration
 arvcfg.declare_config "TLS.Insecure", Boolean, :sso_insecure
 arvcfg.declare_config "Services.SSO.ExternalURL", String, :sso_provider_url
 arvcfg.declare_config "AuditLogs.MaxAge", ActiveSupport::Duration, :max_audit_log_age
index 717101c2b2b6ccbacb9e01c587195b38e1bd8bb4..9605b584e9b4c94f42753fd58ac95fb35a04b048 100644 (file)
@@ -5,4 +5,5 @@
 # Set up gems listed in the Gemfile.
 ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __dir__)
 
-require 'bundler/setup'
+require 'bundler/setup' # Set up gems listed in the Gemfile.
+require 'bootsnap/setup' # Speed up boot time by caching expensive operations.
\ No newline at end of file
index 56a4ed6dcd9ecad7b92ccdbd18fb28633acb869c..f5ab77a4df285283dab8e2c3ef1f0fe35b7da2d4 100644 (file)
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-Server::Application.configure do
+Rails.application.configure do
   # Settings specified here will take precedence over those in config/application.rb
 
   # In the development environment your application's code is reloaded on
index 6c48dcd0196209f3b16a31f64f48ad93fa06244b..c8194057ccfc731d5fbf91b2fdfd55d0c417f812 100644 (file)
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-Server::Application.configure do
+Rails.application.configure do
   # Settings specified here will take precedence over those in config/application.rb
 
   # Code is not reloaded between requests
index 6b550587cbb28b95d7b07bf1f0841afe6ec5bdc4..9cdf5d9cd137aa0342a932c6c875c8a17b4f2ae7 100644 (file)
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-Server::Application.configure do
+Rails.application.configure do
   # Settings specified here will take precedence over those in config/application.rb
 
   # The test environment is used exclusively to run your application's
diff --git a/services/api/config/initializers/content_security_policy.rb b/services/api/config/initializers/content_security_policy.rb
new file mode 100644 (file)
index 0000000..853ecde
--- /dev/null
@@ -0,0 +1,29 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# Be sure to restart your server when you modify this file.
+
+# Define an application-wide content security policy
+# For further information see the following documentation
+# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy
+
+# Rails.application.config.content_security_policy do |policy|
+#   policy.default_src :self, :https
+#   policy.font_src    :self, :https, :data
+#   policy.img_src     :self, :https, :data
+#   policy.object_src  :none
+#   policy.script_src  :self, :https
+#   policy.style_src   :self, :https
+
+#   # Specify URI for violation reports
+#   # policy.report_uri "/csp-violation-report-endpoint"
+# end
+
+# If you are using UJS then enable automatic nonce generation
+# Rails.application.config.content_security_policy_nonce_generator = -> request { SecureRandom.base64(16) }
+
+# Report CSP violations to a specified URI
+# For further information see the following documentation:
+# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy-Report-Only
+# Rails.application.config.content_security_policy_report_only = true
index 8f3b3cb5f8e951df55979a1f74adce8b847de652..2abe40566ecf03cc0d48054b74690c6d1d7048b6 100644 (file)
@@ -8,8 +8,13 @@
 
 require 'enable_jobs_api'
 
-Server::Application.configure do
-  if ActiveRecord::Base.connection.tables.include?('jobs')
-    check_enable_legacy_jobs_api
+Rails.application.configure do
+  begin
+    if ActiveRecord::Base.connection.tables.include?('jobs')
+      check_enable_legacy_jobs_api
+    end
+  rescue ActiveRecord::NoDatabaseError
+    # Since rails 5.2, all initializers are run by rake tasks (like db:create),
+    # see: https://github.com/rails/rails/issues/32870
   end
 end
diff --git a/services/api/config/initializers/new_framework_defaults_5_2.rb b/services/api/config/initializers/new_framework_defaults_5_2.rb
new file mode 100644 (file)
index 0000000..93a8d52
--- /dev/null
@@ -0,0 +1,42 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# Be sure to restart your server when you modify this file.
+#
+# This file contains migration options to ease your Rails 5.2 upgrade.
+#
+# Once upgraded flip defaults one by one to migrate to the new default.
+#
+# Read the Guide for Upgrading Ruby on Rails for more info on each option.
+
+# Make Active Record use stable #cache_key alongside new #cache_version method.
+# This is needed for recyclable cache keys.
+# Rails.application.config.active_record.cache_versioning = true
+
+# Use AES-256-GCM authenticated encryption for encrypted cookies.
+# Also, embed cookie expiry in signed or encrypted cookies for increased security.
+#
+# This option is not backwards compatible with earlier Rails versions.
+# It's best enabled when your entire app is migrated and stable on 5.2.
+#
+# Existing cookies will be converted on read then written with the new scheme.
+# Rails.application.config.action_dispatch.use_authenticated_cookie_encryption = true
+
+# Use AES-256-GCM authenticated encryption as default cipher for encrypting messages
+# instead of AES-256-CBC, when use_authenticated_message_encryption is set to true.
+# Rails.application.config.active_support.use_authenticated_message_encryption = true
+
+# Add default protection from forgery to ActionController::Base instead of in
+# ApplicationController.
+# Rails.application.config.action_controller.default_protect_from_forgery = true
+
+# Store boolean values are in sqlite3 databases as 1 and 0 instead of 't' and
+# 'f' after migrating old data.
+# Rails.application.config.active_record.sqlite3.represent_boolean_as_integer = true
+
+# Use SHA-1 instead of MD5 to generate non-sensitive digests, such as the ETag header.
+# Rails.application.config.active_support.use_sha1_digests = true
+
+# Make `form_with` generate id attributes for any generated HTML tags.
+# Rails.application.config.action_view.form_with_generates_ids = true
diff --git a/services/api/config/initializers/preload_all_models.rb b/services/api/config/initializers/preload_all_models.rb
deleted file mode 100644 (file)
index 713c61f..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-# See http://aaronvb.com/articles/37-rails-caching-and-undefined-class-module
-
-# Config must be done before we load model class files; otherwise they
-# won't be able to use Rails.configuration.* to initialize their
-# classes.
-
-if Rails.env == 'development'
-  Dir.foreach("#{Rails.root}/app/models") do |model_file|
-    require_dependency model_file if model_file.match(/\.rb$/)
-  end
-end
index cedd8f3e4a325b4e438febdc7d8cc9a7367c1a56..26681d613fa60b1daaa8857bdf4bebe3bd082096 100644 (file)
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-ActiveRecord::Base.connection.class.set_callback :checkout, :after do
+ActiveRecord::ConnectionAdapters::AbstractAdapter.set_callback :checkout, :before, ->(conn) do
   # If the database connection is in a time zone other than UTC,
   # "timestamp" values don't behave as desired.
   #
@@ -11,5 +11,5 @@ ActiveRecord::Base.connection.class.set_callback :checkout, :after do
   # before now()), but false in time zone -0100 (now() returns an
   # earlier clock time, and its time zone is dropped when comparing to
   # a "timestamp without time zone").
-  raw_connection.sync_exec("SET TIME ZONE 'UTC'")
+  conn.execute("SET TIME ZONE 'UTC'")
 end
index 976777723a970cf79600b13399f871ee7dafba12..6fb9786504ea5247982f342ac7dfc6d426486b46 100644 (file)
@@ -9,7 +9,7 @@
 
 # Enable parameter wrapping for JSON. You can disable this by setting :format to an empty array.
 ActiveSupport.on_load(:action_controller) do
-  wrap_parameters :format => [:json]
+  wrap_parameters format: [:json]
 end
 
 # Disable root element in JSON by default.
index 8afd22192a62f56c002b363bf63625e07009fcec..69758580356ba771ac05a70e022735fe092962d5 100644 (file)
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-Server::Application.routes.draw do
+Rails.application.routes.draw do
   themes_for_rails
 
   # OPTIONS requests are not allowed at routes that use cookies.
diff --git a/services/api/config/secrets.yml b/services/api/config/secrets.yml
new file mode 100644 (file)
index 0000000..293b93b
--- /dev/null
@@ -0,0 +1,31 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# Be sure to restart your server when you modify this file.
+
+# Your secret key is used for verifying the integrity of signed cookies.
+# If you change this key, all old signed cookies will become invalid!
+
+# Make sure the secret is at least 30 characters and all random,
+# no regular words or you'll be exposed to dictionary attacks.
+# You can use `rails secret` to generate a secure secret key.
+
+# NOTE that these get overriden by Arvados' own configuration system.
+
+# shared:
+#   api_key: a1B2c3D4e5F6
+
+# Environmental secrets are only available for that specific environment.
+
+# development:
+#   secret_key_base: <%= rand(1<<255).to_s(36) %>
+
+# test:
+#   secret_key_base: <%= rand(1<<255).to_s(36) %>
+
+# In case this doesn't get overriden for some reason, assign a random key
+# to gracefully degrade by rejecting cookies instead of by opening a
+# vulnerability.
+production:
+  secret_key_base: <%= rand(1<<255).to_s(36) %>
index 886c8873891c044270313e3563c73e4fe950c5cb..2b5e3b8abff2d14ddebea3008aa6774280c466f9 100644 (file)
@@ -62,7 +62,12 @@ module AuditLogs
       rescue => e
         Rails.logger.error "#{e.class}: #{e}\n#{e.backtrace.join("\n\t")}"
       ensure
-        ActiveRecord::Base.connection.close
+        # Rails 5.1+ makes test threads share a database connection, so we can't
+        # close a connection shared with other threads.
+        # https://github.com/rails/rails/commit/deba47799ff905f778e0c98a015789a1327d5087
+        if Rails.env != "test"
+          ActiveRecord::Base.connection.close
+        end
       end
     end
   end
index 8613c749cf247c6c11f309c4d43cddc544e99b4f..c09896567f3ac1291d8cbe0632393ac60d2ac8fc 100644 (file)
@@ -69,7 +69,12 @@ module SweepTrashedObjects
         rescue => e
           Rails.logger.error "#{e.class}: #{e}\n#{e.backtrace.join("\n\t")}"
         ensure
-          ActiveRecord::Base.connection.close
+          # Rails 5.1+ makes test threads share a database connection, so we can't
+          # close a connection shared with other threads.
+          # https://github.com/rails/rails/commit/deba47799ff905f778e0c98a015789a1327d5087
+          if Rails.env != "test"
+            ActiveRecord::Base.connection.close
+          end
         end
       end
     end
diff --git a/services/api/lib/tasks/manage_long_lived_tokens.rake b/services/api/lib/tasks/manage_long_lived_tokens.rake
new file mode 100644 (file)
index 0000000..7bcf315
--- /dev/null
@@ -0,0 +1,61 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# Tasks that can be useful when changing token expiration policies by assigning
+# a non-zero value to Login.TokenLifetime config.
+
+require 'set'
+require 'current_api_client'
+
+namespace :db do
+  desc "Apply expiration policy on long lived tokens"
+  task fix_long_lived_tokens: :environment do
+    if Rails.configuration.Login.TokenLifetime == 0
+      puts("No expiration policy set on Login.TokenLifetime.")
+    else
+      exp_date = Time.now + Rails.configuration.Login.TokenLifetime
+      puts("Setting token expiration to: #{exp_date}")
+      token_count = 0
+      ll_tokens.each do |auth|
+        if (auth.user.uuid =~ /-tpzed-000000000000000/).nil?
+          CurrentApiClientHelper.act_as_system_user do
+            auth.update_attributes!(expires_at: exp_date)
+          end
+          token_count += 1
+        end
+      end
+      puts("#{token_count} tokens updated.")
+    end
+  end
+
+  desc "Show users with long lived tokens"
+  task check_long_lived_tokens: :environment do
+    user_ids = Set.new()
+    token_count = 0
+    ll_tokens.each do |auth|
+      if (auth.user.uuid =~ /-tpzed-000000000000000/).nil?
+        user_ids.add(auth.user_id)
+        token_count += 1
+      end
+    end
+
+    if user_ids.size > 0
+      puts("Found #{token_count} long-lived tokens from users:")
+      user_ids.each do |uid|
+        u = User.find(uid)
+        puts("#{u.username},#{u.email},#{u.uuid}") if !u.nil?
+      end
+    else
+      puts("No long-lived tokens found.")
+    end
+  end
+
+  def ll_tokens
+    query = ApiClientAuthorization.where(expires_at: nil)
+    if Rails.configuration.Login.TokenLifetime > 0
+      query = query.or(ApiClientAuthorization.where("expires_at > ?", Time.now + Rails.configuration.Login.TokenLifetime))
+    end
+    query
+  end
+end
index c688ac008b44b21944e86b36cdb3abbb15273e12..6c17f1bd03bf5bae3d1dbd9a2a9e4123ee99b715 100644 (file)
@@ -33,7 +33,7 @@ module UpdatePriority
       # priority==0 but should be >0:
       act_as_system_user do
         Container.
-          joins("JOIN container_requests ON container_requests.container_uuid=containers.uuid AND container_requests.state=#{Container.sanitize(ContainerRequest::Committed)} AND container_requests.priority>0").
+          joins("JOIN container_requests ON container_requests.container_uuid=containers.uuid AND container_requests.state=#{ActiveRecord::Base.connection.quote(ContainerRequest::Committed)} AND container_requests.priority>0").
           where('containers.state IN (?) AND containers.priority=0 AND container_requests.uuid IS NOT NULL',
                 [Container::Queued, Container::Locked, Container::Running]).
           map(&:update_priority!)
@@ -55,7 +55,12 @@ module UpdatePriority
       rescue => e
         Rails.logger.error "#{e.class}: #{e}\n#{e.backtrace.join("\n\t")}"
       ensure
-        ActiveRecord::Base.connection.close
+        # Rails 5.1+ makes test threads share a database connection, so we can't
+        # close a connection shared with other threads.
+        # https://github.com/rails/rails/commit/deba47799ff905f778e0c98a015789a1327d5087
+        if Rails.env != "test"
+          ActiveRecord::Base.connection.close
+        end
       end
     end
   end
index ce1d447f16ad0f950327ecfa1e47f7cb24fcd76f..0fbc7625ceb0d985d4c26d10e9cc2b636574378e 100644 (file)
@@ -50,8 +50,7 @@ class Arvados::V1::KeepServicesControllerTest < ActionController::TestCase
     refute_empty expect_rvz
     authorize_with :active
     get :index,
-      params: {:format => :json},
-      headers: auth(:active)
+      params: {:format => :json}
     assert_response :success
     json_response['items'].each do |svc|
       url = "#{svc['service_ssl_flag'] ? 'https' : 'http'}://#{svc['service_host']}:#{svc['service_port']}/"
index fc9475692a5933c2ed01f77e7871f4fd3942d7ec..cd475dea4d1849f6d99374fa3976068767ef1fcb 100644 (file)
@@ -14,7 +14,6 @@ class UserSessionsControllerTest < ActionController::TestCase
     assert_nil assigns(:api_client)
   end
 
-
   test "send token when user is already logged in" do
     authorize_with :inactive
     api_client_page = 'http://client.example.com/home'
@@ -26,6 +25,28 @@ class UserSessionsControllerTest < ActionController::TestCase
     assert_not_nil assigns(:api_client)
   end
 
+  test "login creates token without expiration by default" do
+    assert_equal Rails.configuration.Login.TokenLifetime, 0
+    authorize_with :inactive
+    api_client_page = 'http://client.example.com/home'
+    get :login, params: {return_to: api_client_page}
+    assert_not_nil assigns(:api_client)
+    assert_nil assigns(:api_client_auth).expires_at
+  end
+
+  test "login creates token with configured lifetime" do
+    token_lifetime = 1.hour
+    Rails.configuration.Login.TokenLifetime = token_lifetime
+    authorize_with :inactive
+    api_client_page = 'http://client.example.com/home'
+    get :login, params: {return_to: api_client_page}
+    assert_not_nil assigns(:api_client)
+    api_client_auth = assigns(:api_client_auth)
+    assert_in_delta(api_client_auth.expires_at,
+                    api_client_auth.updated_at + token_lifetime,
+                    1.second)
+  end
+
   test "login with remote param returns a salted token" do
     authorize_with :inactive
     api_client_page = 'http://client.example.com/home'
index df082c27fd8c35f7a8d1011bcd3faeba3d4bd4d8..93e4c51abf0e9266a1883ab18bf5634ea45722b0 100644 (file)
@@ -7,25 +7,32 @@ require 'test_helper'
 class ApiClientTest < ActiveSupport::TestCase
   include CurrentApiClient
 
-  test "configured workbench is trusted" do
-    Rails.configuration.Services.Workbench1.ExternalURL = URI("http://wb1.example.com")
-    Rails.configuration.Services.Workbench2.ExternalURL = URI("https://wb2.example.com:443")
+  [true, false].each do |token_lifetime_enabled|
+    test "configured workbench is trusted when token lifetime is#{token_lifetime_enabled ? '': ' not'} enabled" do
+      Rails.configuration.Login.TokenLifetime = token_lifetime_enabled ? 8.hours : 0
+      Rails.configuration.Services.Workbench1.ExternalURL = URI("http://wb1.example.com")
+      Rails.configuration.Services.Workbench2.ExternalURL = URI("https://wb2.example.com:443")
 
-    act_as_system_user do
-      [["http://wb0.example.com", false],
-       ["http://wb1.example.com", true],
-       ["http://wb2.example.com", false],
-       ["https://wb2.example.com", true],
-       ["https://wb2.example.com/", true],
-      ].each do |pfx, result|
-        a = ApiClient.create(url_prefix: pfx, is_trusted: false)
-        assert_equal result, a.is_trusted
-      end
+      act_as_system_user do
+        [["http://wb0.example.com", false],
+        ["http://wb1.example.com", true],
+        ["http://wb2.example.com", false],
+        ["https://wb2.example.com", true],
+        ["https://wb2.example.com/", true],
+        ].each do |pfx, result|
+          a = ApiClient.create(url_prefix: pfx, is_trusted: false)
+          if token_lifetime_enabled
+            assert_equal false, a.is_trusted, "API client with url prefix '#{pfx}' shouldn't be trusted"
+          else
+            assert_equal result, a.is_trusted
+          end
+        end
 
-      a = ApiClient.create(url_prefix: "http://example.com", is_trusted: true)
-      a.save!
-      a.reload
-      assert a.is_trusted
+        a = ApiClient.create(url_prefix: "http://example.com", is_trusted: true)
+        a.save!
+        a.reload
+        assert a.is_trusted
+      end
     end
   end
 end
index c1db8c8b5db1aa48fe4a843fb2a573f0b0966a3f..64f78071350a6736994986eff3267c541e72b4f6 100644 (file)
@@ -295,4 +295,29 @@ class ArvadosModelTest < ActiveSupport::TestCase
     c.reload
     assert_equal({'foo' => 'bar'}, c.properties)
   end
+
+  test 'serialized attributes dirty tracking with audit log settings' do
+    Rails.configuration.AuditLogs.MaxDeleteBatch = 1000
+    set_user_from_auth :admin
+    [false, true].each do |auditlogs_enabled|
+      if auditlogs_enabled
+        Rails.configuration.AuditLogs.MaxAge = 3600
+      else
+        Rails.configuration.AuditLogs.MaxAge = 0
+      end
+      [
+        User.find_by_uuid(users(:active).uuid),
+        ContainerRequest.find_by_uuid(container_requests(:queued).uuid),
+        Container.find_by_uuid(containers(:queued).uuid),
+        PipelineInstance.find_by_uuid(pipeline_instances(:has_component_with_completed_jobs).uuid),
+        PipelineTemplate.find_by_uuid(pipeline_templates(:two_part).uuid),
+        Job.find_by_uuid(jobs(:running).uuid)
+      ].each do |obj|
+        assert_not(obj.class.serialized_attributes.empty?,
+          "#{obj.class} model doesn't have serialized attributes")
+        # obj shouldn't have changed since it's just retrieved from the database
+        assert_not(obj.changed?, "#{obj.class} model's attribute(s) appear as changed: '#{obj.changes.keys.join(',')}' with audit logs #{auditlogs_enabled ? '': 'not '}enabled.")
+      end
+    end
+  end
 end
index a1c8ff8a921d214d2ea27608708c0b3d19caa8f9..016a0e4eb4a9b6a59717de2c75a634b3182dd82f 100644 (file)
@@ -378,19 +378,6 @@ class LogTest < ActiveSupport::TestCase
         sleep 0.1
       end
       assert_operator remaining_audit_logs.count, :<, initial_log_count
-    ensure
-      # The test framework rolls back our transactions, but that
-      # doesn't undo the deletes we did from separate threads.
-      ActiveRecord::Base.connection.exec_query 'ROLLBACK'
-      Thread.new do
-        begin
-          dc = DatabaseController.new
-          dc.define_singleton_method :render do |*args| end
-          dc.reset
-        ensure
-          ActiveRecord::Base.connection.close
-        end
-      end.join
     end
   end
 end
index b54e8d9de64f970726dc49d07ca47e368491986a..9fa3febe1e75fddf4227fae9801cda7d976b4149 100644 (file)
@@ -141,7 +141,7 @@ class NodeTest < ActiveSupport::TestCase
     assert_equal "custom1", node2.hostname
   end
 
-  test "update dns when nodemanager clears hostname and ip_address" do
+  test "update dns when hostname and ip_address are cleared" do
     act_as_system_user do
       node = ping_node(:new_with_custom_hostname, {})
       Node.expects(:dns_server_update).with(node.hostname, Node::UNUSED_NODE_IP)
index 5f3cc608c3a9d360518dda68d47d1cde901cc459..fb0fc0d7830f3582bbe14c27bb45753661770aad 100644 (file)
@@ -54,7 +54,7 @@ func (s *GitoliteSuite) SetUpTest(c *check.C) {
        s.cluster, err = cfg.GetCluster("")
        c.Assert(err, check.Equals, nil)
 
-       s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: "localhost:0"}: arvados.ServiceInstance{}}
+       s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{{Host: "localhost:0"}: {}}
        s.cluster.TLS.Insecure = true
        s.cluster.Git.GitCommand = "/usr/share/gitolite3/gitolite-shell"
        s.cluster.Git.GitoliteHome = s.gitoliteHome
diff --git a/services/arv-web/README b/services/arv-web/README
deleted file mode 100644 (file)
index eaf7624..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-arv-web enables you to run a custom web service using the contents of an
-Arvados collection.
-
-See "Using arv-web" in the Arvados user guide:
-
-http://doc.arvados.org/user/topics/arv-web.html
diff --git a/services/arv-web/arv-web.py b/services/arv-web/arv-web.py
deleted file mode 100755 (executable)
index 55b710a..0000000
+++ /dev/null
@@ -1,256 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-# arv-web enables you to run a custom web service from the contents of an Arvados collection.
-#
-# See http://doc.arvados.org/user/topics/arv-web.html
-
-import arvados
-from arvados.safeapi import ThreadSafeApiCache
-import subprocess
-from arvados_fuse import Operations, CollectionDirectory
-import tempfile
-import os
-import llfuse
-import threading
-import Queue
-import argparse
-import logging
-import signal
-import sys
-import functools
-
-logger = logging.getLogger('arvados.arv-web')
-logger.setLevel(logging.INFO)
-
-class ArvWeb(object):
-    def __init__(self, project, docker_image, port):
-        self.project = project
-        self.loop = True
-        self.cid = None
-        self.prev_docker_image = None
-        self.mountdir = None
-        self.collection = None
-        self.override_docker_image = docker_image
-        self.port = port
-        self.evqueue = Queue.Queue()
-        self.api = ThreadSafeApiCache(arvados.config.settings())
-
-        if arvados.util.group_uuid_pattern.match(project) is None:
-            raise arvados.errors.ArgumentError("Project uuid is not valid")
-
-        collections = self.api.collections().list(filters=[["owner_uuid", "=", project]],
-                        limit=1,
-                        order='modified_at desc').execute()['items']
-        self.newcollection = collections[0]['uuid'] if collections else None
-
-        self.ws = arvados.events.subscribe(self.api, [["object_uuid", "is_a", "arvados#collection"]], self.on_message)
-
-    def check_docker_running(self):
-        # It would be less hacky to use "docker events" than poll "docker ps"
-        # but that would require writing a bigger pile of code.
-        if self.cid:
-            ps = subprocess.check_output(["docker", "ps", "--no-trunc=true", "--filter=status=running"])
-            for l in ps.splitlines():
-                if l.startswith(self.cid):
-                    return True
-        return False
-
-    # Handle messages from Arvados event bus.
-    def on_message(self, ev):
-        if 'event_type' in ev:
-            old_attr = None
-            if 'old_attributes' in ev['properties'] and ev['properties']['old_attributes']:
-                old_attr = ev['properties']['old_attributes']
-            if self.project not in (ev['properties']['new_attributes']['owner_uuid'],
-                                    old_attr['owner_uuid'] if old_attr else None):
-                return
-
-            et = ev['event_type']
-            if ev['event_type'] == 'update':
-                if ev['properties']['new_attributes']['owner_uuid'] != ev['properties']['old_attributes']['owner_uuid']:
-                    if self.project == ev['properties']['new_attributes']['owner_uuid']:
-                        et = 'add'
-                    else:
-                        et = 'remove'
-                if ev['properties']['new_attributes']['trash_at'] is not None:
-                    et = 'remove'
-
-            self.evqueue.put((self.project, et, ev['object_uuid']))
-
-    # Run an arvados_fuse mount under the control of the local process.  This lets
-    # us switch out the contents of the directory without having to unmount and
-    # remount.
-    def run_fuse_mount(self):
-        self.mountdir = tempfile.mkdtemp()
-
-        self.operations = Operations(os.getuid(), os.getgid(), self.api, "utf-8")
-        self.cdir = CollectionDirectory(llfuse.ROOT_INODE, self.operations.inodes, self.api, 2, self.collection)
-        self.operations.inodes.add_entry(self.cdir)
-
-        # Initialize the fuse connection
-        llfuse.init(self.operations, self.mountdir, ['allow_other'])
-
-        t = threading.Thread(None, llfuse.main)
-        t.start()
-
-        # wait until the driver is finished initializing
-        self.operations.initlock.wait()
-
-    def mount_collection(self):
-        if self.newcollection != self.collection:
-            self.collection = self.newcollection
-            if not self.mountdir and self.collection:
-                self.run_fuse_mount()
-
-            if self.mountdir:
-                with llfuse.lock:
-                    self.cdir.clear()
-                    # Switch the FUSE directory object so that it stores
-                    # the newly selected collection
-                    if self.collection:
-                        logger.info("Mounting %s", self.collection)
-                    else:
-                        logger.info("Mount is empty")
-                    self.cdir.change_collection(self.collection)
-
-
-    def stop_docker(self):
-        if self.cid:
-            logger.info("Stopping Docker container")
-            subprocess.call(["docker", "stop", self.cid])
-            self.cid = None
-
-    def run_docker(self):
-        try:
-            if self.collection is None:
-                self.stop_docker()
-                return
-
-            docker_image = None
-            if self.override_docker_image:
-                docker_image = self.override_docker_image
-            else:
-                try:
-                    with llfuse.lock:
-                        if "docker_image" in self.cdir:
-                            docker_image = self.cdir["docker_image"].readfrom(0, 1024).strip()
-                except IOError as e:
-                    pass
-
-            has_reload = False
-            try:
-                with llfuse.lock:
-                    has_reload = "reload" in self.cdir
-            except IOError as e:
-                pass
-
-            if docker_image is None:
-                logger.error("Collection must contain a file 'docker_image' or must specify --image on the command line.")
-                self.stop_docker()
-                return
-
-            if docker_image == self.prev_docker_image and self.cid is not None and has_reload:
-                logger.info("Running container reload command")
-                subprocess.check_call(["docker", "exec", self.cid, "/mnt/reload"])
-                return
-
-            self.stop_docker()
-
-            logger.info("Starting Docker container %s", docker_image)
-            self.cid = subprocess.check_output(["docker", "run",
-                                                "--detach=true",
-                                                "--publish=%i:80" % (self.port),
-                                                "--volume=%s:/mnt:ro" % self.mountdir,
-                                                docker_image]).strip()
-
-            self.prev_docker_image = docker_image
-            logger.info("Container id %s", self.cid)
-
-        except subprocess.CalledProcessError:
-            self.cid = None
-
-    def wait_for_events(self):
-        if not self.cid:
-            logger.warning("No service running!  Will wait for a new collection to appear in the project.")
-        else:
-            logger.info("Waiting for events")
-
-        running = True
-        self.loop = True
-        while running:
-            # Main run loop.  Wait on project events, signals, or the
-            # Docker container stopping.
-
-            try:
-                # Poll the queue with a 1 second timeout, if we have no
-                # timeout the Python runtime doesn't have a chance to
-                # process SIGINT or SIGTERM.
-                eq = self.evqueue.get(True, 1)
-                logger.info("%s %s", eq[1], eq[2])
-                self.newcollection = self.collection
-                if eq[1] in ('add', 'update', 'create'):
-                    self.newcollection = eq[2]
-                elif eq[1] == 'remove':
-                    collections = self.api.collections().list(filters=[["owner_uuid", "=", self.project]],
-                                                        limit=1,
-                                                        order='modified_at desc').execute()['items']
-                    self.newcollection = collections[0]['uuid'] if collections else None
-                running = False
-            except Queue.Empty:
-                pass
-
-            if self.cid and not self.check_docker_running():
-                logger.warning("Service has terminated.  Will try to restart.")
-                self.cid = None
-                running = False
-
-
-    def run(self):
-        try:
-            while self.loop:
-                self.loop = False
-                self.mount_collection()
-                try:
-                    self.run_docker()
-                    self.wait_for_events()
-                except (KeyboardInterrupt):
-                    logger.info("Got keyboard interrupt")
-                    self.ws.close()
-                    self.loop = False
-                except Exception as e:
-                    logger.exception("Caught fatal exception, shutting down")
-                    self.ws.close()
-                    self.loop = False
-        finally:
-            self.stop_docker()
-
-            if self.mountdir:
-                logger.info("Unmounting")
-                subprocess.call(["fusermount", "-u", self.mountdir])
-                os.rmdir(self.mountdir)
-
-
-def main(argv):
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--project-uuid', type=str, required=True, help="Project uuid to watch")
-    parser.add_argument('--port', type=int, default=8080, help="Host port to listen on (default 8080)")
-    parser.add_argument('--image', type=str, help="Docker image to run")
-
-    args = parser.parse_args(argv)
-
-    signal.signal(signal.SIGTERM, lambda signal, frame: sys.exit(0))
-
-    try:
-        arvweb = ArvWeb(args.project_uuid, args.image, args.port)
-        arvweb.run()
-    except arvados.errors.ArgumentError as e:
-        logger.error(e)
-        return 1
-
-    return 0
-
-if __name__ == '__main__':
-    sys.exit(main(sys.argv[1:]))
diff --git a/services/arv-web/sample-cgi-app/docker_image b/services/arv-web/sample-cgi-app/docker_image
deleted file mode 100644 (file)
index 57f344f..0000000
+++ /dev/null
@@ -1 +0,0 @@
-arvados/arv-web
\ No newline at end of file
diff --git a/services/arv-web/sample-cgi-app/public/.htaccess b/services/arv-web/sample-cgi-app/public/.htaccess
deleted file mode 100644 (file)
index e5145bd..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-Options +ExecCGI
-AddHandler cgi-script .cgi
-DirectoryIndex index.cgi
diff --git a/services/arv-web/sample-cgi-app/public/index.cgi b/services/arv-web/sample-cgi-app/public/index.cgi
deleted file mode 100755 (executable)
index 57bc2a9..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/usr/bin/perl
-
-print "Content-type: text/html\n\n";
-print "Hello world from perl!";
diff --git a/services/arv-web/sample-cgi-app/tmp/.keepkeep b/services/arv-web/sample-cgi-app/tmp/.keepkeep
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/services/arv-web/sample-rack-app/config.ru b/services/arv-web/sample-rack-app/config.ru
deleted file mode 100644 (file)
index 65f3c7c..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-app = proc do |env|
-    [200, { "Content-Type" => "text/html" }, ["hello <b>world</b> from ruby"]]
-end
-run app
diff --git a/services/arv-web/sample-rack-app/docker_image b/services/arv-web/sample-rack-app/docker_image
deleted file mode 100644 (file)
index 57f344f..0000000
+++ /dev/null
@@ -1 +0,0 @@
-arvados/arv-web
\ No newline at end of file
diff --git a/services/arv-web/sample-rack-app/public/.keepkeep b/services/arv-web/sample-rack-app/public/.keepkeep
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/services/arv-web/sample-rack-app/tmp/.keepkeep b/services/arv-web/sample-rack-app/tmp/.keepkeep
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/services/arv-web/sample-static-page/docker_image b/services/arv-web/sample-static-page/docker_image
deleted file mode 100644 (file)
index 57f344f..0000000
+++ /dev/null
@@ -1 +0,0 @@
-arvados/arv-web
\ No newline at end of file
diff --git a/services/arv-web/sample-static-page/public/index.html b/services/arv-web/sample-static-page/public/index.html
deleted file mode 100644 (file)
index e8608a5..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-<!-- Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: AGPL-3.0 -->
-
-<html>
-  <head><title>arv-web sample</title></head>
-  <body>
-    <p>Hello world static page</p>
-  </body>
-</html>
diff --git a/services/arv-web/sample-static-page/tmp/.keepkeep b/services/arv-web/sample-static-page/tmp/.keepkeep
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/services/arv-web/sample-wsgi-app/docker_image b/services/arv-web/sample-wsgi-app/docker_image
deleted file mode 100644 (file)
index 57f344f..0000000
+++ /dev/null
@@ -1 +0,0 @@
-arvados/arv-web
\ No newline at end of file
diff --git a/services/arv-web/sample-wsgi-app/passenger_wsgi.py b/services/arv-web/sample-wsgi-app/passenger_wsgi.py
deleted file mode 100644 (file)
index faec3c2..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-def application(environ, start_response):
-    start_response('200 OK', [('Content-Type', 'text/plain')])
-    return [b"hello world from python!\n"]
diff --git a/services/arv-web/sample-wsgi-app/public/.keepkeep b/services/arv-web/sample-wsgi-app/public/.keepkeep
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/services/arv-web/sample-wsgi-app/tmp/.keepkeep b/services/arv-web/sample-wsgi-app/tmp/.keepkeep
deleted file mode 100644 (file)
index e69de29..0000000
index 643ca4f587f51bc9b353ab29b4a82869d96578a8..963948cc6bf53cbc86a2568bbc0286310dda5e4b 100644 (file)
@@ -185,10 +185,6 @@ var (
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        h.setupOnce.Do(h.setup)
 
-       remoteAddr := r.RemoteAddr
-       if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
-               remoteAddr = xff + "," + remoteAddr
-       }
        if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
                r.URL.Scheme = xfp
        }
@@ -227,6 +223,10 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
        }
 
+       if h.serveS3(w, r) {
+               return
+       }
+
        pathParts := strings.Split(r.URL.Path[1:], "/")
 
        var stripParts int
@@ -509,6 +509,27 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        }
 }
 
+func (h *handler) getClients(reqID, token string) (arv *arvadosclient.ArvadosClient, kc *keepclient.KeepClient, client *arvados.Client, release func(), err error) {
+       arv = h.clientPool.Get()
+       if arv == nil {
+               return nil, nil, nil, nil, err
+       }
+       release = func() { h.clientPool.Put(arv) }
+       arv.ApiToken = token
+       kc, err = keepclient.MakeKeepClient(arv)
+       if err != nil {
+               release()
+               return
+       }
+       kc.RequestID = reqID
+       client = (&arvados.Client{
+               APIHost:   arv.ApiServer,
+               AuthToken: arv.ApiToken,
+               Insecure:  arv.ApiInsecure,
+       }).WithRequestID(reqID)
+       return
+}
+
 func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []string, credentialsOK, attachment bool) {
        if len(tokens) == 0 {
                w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"")
@@ -519,25 +540,13 @@ func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []s
                http.Error(w, errReadOnly.Error(), http.StatusMethodNotAllowed)
                return
        }
-       arv := h.clientPool.Get()
-       if arv == nil {
+       _, kc, client, release, err := h.getClients(r.Header.Get("X-Request-Id"), tokens[0])
+       if err != nil {
                http.Error(w, "Pool failed: "+h.clientPool.Err().Error(), http.StatusInternalServerError)
                return
        }
-       defer h.clientPool.Put(arv)
-       arv.ApiToken = tokens[0]
+       defer release()
 
-       kc, err := keepclient.MakeKeepClient(arv)
-       if err != nil {
-               http.Error(w, "error setting up keep client: "+err.Error(), http.StatusInternalServerError)
-               return
-       }
-       kc.RequestID = r.Header.Get("X-Request-Id")
-       client := (&arvados.Client{
-               APIHost:   arv.ApiServer,
-               AuthToken: arv.ApiToken,
-               Insecure:  arv.ApiInsecure,
-       }).WithRequestID(r.Header.Get("X-Request-Id"))
        fs := client.SiteFileSystem(kc)
        fs.ForwardSlashNameSubstitution(h.Config.cluster.Collections.ForwardSlashNameSubstitution)
        f, err := fs.Open(r.URL.Path)
index e4028842f0c6b9390715a93c836846f2d9ba753b..647eab1653294311644bdce91faa367bd0ec1832 100644 (file)
@@ -14,6 +14,7 @@ import (
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "github.com/coreos/go-systemd/daemon"
        "github.com/ghodss/yaml"
+       "github.com/sirupsen/logrus"
        log "github.com/sirupsen/logrus"
 )
 
@@ -111,7 +112,7 @@ func main() {
 
        os.Setenv("ARVADOS_API_HOST", cfg.cluster.Services.Controller.ExternalURL.Host)
        srv := &server{Config: cfg}
-       if err := srv.Start(); err != nil {
+       if err := srv.Start(logrus.StandardLogger()); err != nil {
                log.Fatal(err)
        }
        if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
diff --git a/services/keep-web/s3.go b/services/keep-web/s3.go
new file mode 100644 (file)
index 0000000..01bc8b7
--- /dev/null
@@ -0,0 +1,452 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "encoding/xml"
+       "errors"
+       "fmt"
+       "io"
+       "net/http"
+       "os"
+       "path/filepath"
+       "sort"
+       "strconv"
+       "strings"
+
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "github.com/AdRoll/goamz/s3"
+)
+
+const s3MaxKeys = 1000
+
+// serveS3 handles r and returns true if r is a request from an S3
+// client, otherwise it returns false.
+func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
+       var token string
+       if auth := r.Header.Get("Authorization"); strings.HasPrefix(auth, "AWS ") {
+               split := strings.SplitN(auth[4:], ":", 2)
+               if len(split) < 2 {
+                       w.WriteHeader(http.StatusUnauthorized)
+                       return true
+               }
+               token = split[0]
+       } else if strings.HasPrefix(auth, "AWS4-HMAC-SHA256 ") {
+               for _, cmpt := range strings.Split(auth[17:], ",") {
+                       cmpt = strings.TrimSpace(cmpt)
+                       split := strings.SplitN(cmpt, "=", 2)
+                       if len(split) == 2 && split[0] == "Credential" {
+                               keyandscope := strings.Split(split[1], "/")
+                               if len(keyandscope[0]) > 0 {
+                                       token = keyandscope[0]
+                                       break
+                               }
+                       }
+               }
+               if token == "" {
+                       w.WriteHeader(http.StatusBadRequest)
+                       fmt.Println(w, "invalid V4 signature")
+                       return true
+               }
+       } else {
+               return false
+       }
+
+       _, kc, client, release, err := h.getClients(r.Header.Get("X-Request-Id"), token)
+       if err != nil {
+               http.Error(w, "Pool failed: "+h.clientPool.Err().Error(), http.StatusInternalServerError)
+               return true
+       }
+       defer release()
+
+       fs := client.SiteFileSystem(kc)
+       fs.ForwardSlashNameSubstitution(h.Config.cluster.Collections.ForwardSlashNameSubstitution)
+
+       objectNameGiven := strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 1
+
+       switch {
+       case r.Method == http.MethodGet && !objectNameGiven:
+               // Path is "/{uuid}" or "/{uuid}/", has no object name
+               if _, ok := r.URL.Query()["versioning"]; ok {
+                       // GetBucketVersioning
+                       w.Header().Set("Content-Type", "application/xml")
+                       io.WriteString(w, xml.Header)
+                       fmt.Fprintln(w, `<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/"/>`)
+               } else {
+                       // ListObjects
+                       h.s3list(w, r, fs)
+               }
+               return true
+       case r.Method == http.MethodGet || r.Method == http.MethodHead:
+               fspath := "/by_id" + r.URL.Path
+               fi, err := fs.Stat(fspath)
+               if r.Method == "HEAD" && !objectNameGiven {
+                       // HeadBucket
+                       if err == nil && fi.IsDir() {
+                               w.WriteHeader(http.StatusOK)
+                       } else if os.IsNotExist(err) {
+                               w.WriteHeader(http.StatusNotFound)
+                       } else {
+                               http.Error(w, err.Error(), http.StatusBadGateway)
+                       }
+                       return true
+               }
+               if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Config.cluster.Collections.S3FolderObjects {
+                       w.Header().Set("Content-Type", "application/x-directory")
+                       w.WriteHeader(http.StatusOK)
+                       return true
+               }
+               if os.IsNotExist(err) ||
+                       (err != nil && err.Error() == "not a directory") ||
+                       (fi != nil && fi.IsDir()) {
+                       http.Error(w, "not found", http.StatusNotFound)
+                       return true
+               }
+               // shallow copy r, and change URL path
+               r := *r
+               r.URL.Path = fspath
+               http.FileServer(fs).ServeHTTP(w, &r)
+               return true
+       case r.Method == http.MethodPut:
+               if !objectNameGiven {
+                       http.Error(w, "missing object name in PUT request", http.StatusBadRequest)
+                       return true
+               }
+               fspath := "by_id" + r.URL.Path
+               var objectIsDir bool
+               if strings.HasSuffix(fspath, "/") {
+                       if !h.Config.cluster.Collections.S3FolderObjects {
+                               http.Error(w, "invalid object name: trailing slash", http.StatusBadRequest)
+                               return true
+                       }
+                       n, err := r.Body.Read(make([]byte, 1))
+                       if err != nil && err != io.EOF {
+                               http.Error(w, fmt.Sprintf("error reading request body: %s", err), http.StatusInternalServerError)
+                               return true
+                       } else if n > 0 {
+                               http.Error(w, "cannot create object with trailing '/' char unless content is empty", http.StatusBadRequest)
+                               return true
+                       } else if strings.SplitN(r.Header.Get("Content-Type"), ";", 2)[0] != "application/x-directory" {
+                               http.Error(w, "cannot create object with trailing '/' char unless Content-Type is 'application/x-directory'", http.StatusBadRequest)
+                               return true
+                       }
+                       // Given PUT "foo/bar/", we'll use "foo/bar/."
+                       // in the "ensure parents exist" block below,
+                       // and then we'll be done.
+                       fspath += "."
+                       objectIsDir = true
+               }
+               fi, err := fs.Stat(fspath)
+               if err != nil && err.Error() == "not a directory" {
+                       // requested foo/bar, but foo is a file
+                       http.Error(w, "object name conflicts with existing object", http.StatusBadRequest)
+                       return true
+               }
+               if strings.HasSuffix(r.URL.Path, "/") && err == nil && !fi.IsDir() {
+                       // requested foo/bar/, but foo/bar is a file
+                       http.Error(w, "object name conflicts with existing object", http.StatusBadRequest)
+                       return true
+               }
+               // create missing parent/intermediate directories, if any
+               for i, c := range fspath {
+                       if i > 0 && c == '/' {
+                               dir := fspath[:i]
+                               if strings.HasSuffix(dir, "/") {
+                                       err = errors.New("invalid object name (consecutive '/' chars)")
+                                       http.Error(w, err.Error(), http.StatusBadRequest)
+                                       return true
+                               }
+                               err = fs.Mkdir(dir, 0755)
+                               if err == arvados.ErrInvalidArgument {
+                                       // Cannot create a directory
+                                       // here.
+                                       err = fmt.Errorf("mkdir %q failed: %w", dir, err)
+                                       http.Error(w, err.Error(), http.StatusBadRequest)
+                                       return true
+                               } else if err != nil && !os.IsExist(err) {
+                                       err = fmt.Errorf("mkdir %q failed: %w", dir, err)
+                                       http.Error(w, err.Error(), http.StatusInternalServerError)
+                                       return true
+                               }
+                       }
+               }
+               if !objectIsDir {
+                       f, err := fs.OpenFile(fspath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
+                       if os.IsNotExist(err) {
+                               f, err = fs.OpenFile(fspath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644)
+                       }
+                       if err != nil {
+                               err = fmt.Errorf("open %q failed: %w", r.URL.Path, err)
+                               http.Error(w, err.Error(), http.StatusBadRequest)
+                               return true
+                       }
+                       defer f.Close()
+                       _, err = io.Copy(f, r.Body)
+                       if err != nil {
+                               err = fmt.Errorf("write to %q failed: %w", r.URL.Path, err)
+                               http.Error(w, err.Error(), http.StatusBadGateway)
+                               return true
+                       }
+                       err = f.Close()
+                       if err != nil {
+                               err = fmt.Errorf("write to %q failed: close: %w", r.URL.Path, err)
+                               http.Error(w, err.Error(), http.StatusBadGateway)
+                               return true
+                       }
+               }
+               err = fs.Sync()
+               if err != nil {
+                       err = fmt.Errorf("sync failed: %w", err)
+                       http.Error(w, err.Error(), http.StatusInternalServerError)
+                       return true
+               }
+               w.WriteHeader(http.StatusOK)
+               return true
+       case r.Method == http.MethodDelete:
+               if !objectNameGiven || r.URL.Path == "/" {
+                       http.Error(w, "missing object name in DELETE request", http.StatusBadRequest)
+                       return true
+               }
+               fspath := "by_id" + r.URL.Path
+               if strings.HasSuffix(fspath, "/") {
+                       fspath = strings.TrimSuffix(fspath, "/")
+                       fi, err := fs.Stat(fspath)
+                       if os.IsNotExist(err) {
+                               w.WriteHeader(http.StatusNoContent)
+                               return true
+                       } else if err != nil {
+                               http.Error(w, err.Error(), http.StatusInternalServerError)
+                               return true
+                       } else if !fi.IsDir() {
+                               // if "foo" exists and is a file, then
+                               // "foo/" doesn't exist, so we say
+                               // delete was successful.
+                               w.WriteHeader(http.StatusNoContent)
+                               return true
+                       }
+               } else if fi, err := fs.Stat(fspath); err == nil && fi.IsDir() {
+                       // if "foo" is a dir, it is visible via S3
+                       // only as "foo/", not "foo" -- so we leave
+                       // the dir alone and return 204 to indicate
+                       // that "foo" does not exist.
+                       w.WriteHeader(http.StatusNoContent)
+                       return true
+               }
+               err = fs.Remove(fspath)
+               if os.IsNotExist(err) {
+                       w.WriteHeader(http.StatusNoContent)
+                       return true
+               }
+               if err != nil {
+                       err = fmt.Errorf("rm failed: %w", err)
+                       http.Error(w, err.Error(), http.StatusBadRequest)
+                       return true
+               }
+               err = fs.Sync()
+               if err != nil {
+                       err = fmt.Errorf("sync failed: %w", err)
+                       http.Error(w, err.Error(), http.StatusInternalServerError)
+                       return true
+               }
+               w.WriteHeader(http.StatusNoContent)
+               return true
+       default:
+               http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+               return true
+       }
+}
+
+// Call fn on the given path (directory) and its contents, in
+// lexicographic order.
+//
+// If isRoot==true and path is not a directory, return nil.
+//
+// If fn returns filepath.SkipDir when called on a directory, don't
+// descend into that directory.
+func walkFS(fs arvados.CustomFileSystem, path string, isRoot bool, fn func(path string, fi os.FileInfo) error) error {
+       if isRoot {
+               fi, err := fs.Stat(path)
+               if os.IsNotExist(err) || (err == nil && !fi.IsDir()) {
+                       return nil
+               } else if err != nil {
+                       return err
+               }
+               err = fn(path, fi)
+               if err == filepath.SkipDir {
+                       return nil
+               } else if err != nil {
+                       return err
+               }
+       }
+       f, err := fs.Open(path)
+       if os.IsNotExist(err) && isRoot {
+               return nil
+       } else if err != nil {
+               return fmt.Errorf("open %q: %w", path, err)
+       }
+       defer f.Close()
+       if path == "/" {
+               path = ""
+       }
+       fis, err := f.Readdir(-1)
+       if err != nil {
+               return err
+       }
+       sort.Slice(fis, func(i, j int) bool { return fis[i].Name() < fis[j].Name() })
+       for _, fi := range fis {
+               err = fn(path+"/"+fi.Name(), fi)
+               if err == filepath.SkipDir {
+                       continue
+               } else if err != nil {
+                       return err
+               }
+               if fi.IsDir() {
+                       err = walkFS(fs, path+"/"+fi.Name(), false, fn)
+                       if err != nil {
+                               return err
+                       }
+               }
+       }
+       return nil
+}
+
+var errDone = errors.New("done")
+
+func (h *handler) s3list(w http.ResponseWriter, r *http.Request, fs arvados.CustomFileSystem) {
+       var params struct {
+               bucket    string
+               delimiter string
+               marker    string
+               maxKeys   int
+               prefix    string
+       }
+       params.bucket = strings.SplitN(r.URL.Path[1:], "/", 2)[0]
+       params.delimiter = r.FormValue("delimiter")
+       params.marker = r.FormValue("marker")
+       if mk, _ := strconv.ParseInt(r.FormValue("max-keys"), 10, 64); mk > 0 && mk < s3MaxKeys {
+               params.maxKeys = int(mk)
+       } else {
+               params.maxKeys = s3MaxKeys
+       }
+       params.prefix = r.FormValue("prefix")
+
+       bucketdir := "by_id/" + params.bucket
+       // walkpath is the directory (relative to bucketdir) we need
+       // to walk: the innermost directory that is guaranteed to
+       // contain all paths that have the requested prefix. Examples:
+       // prefix "foo/bar"  => walkpath "foo"
+       // prefix "foo/bar/" => walkpath "foo/bar"
+       // prefix "foo"      => walkpath ""
+       // prefix ""         => walkpath ""
+       walkpath := params.prefix
+       if cut := strings.LastIndex(walkpath, "/"); cut >= 0 {
+               walkpath = walkpath[:cut]
+       } else {
+               walkpath = ""
+       }
+
+       resp := s3.ListResp{
+               Name:      strings.SplitN(r.URL.Path[1:], "/", 2)[0],
+               Prefix:    params.prefix,
+               Delimiter: params.delimiter,
+               Marker:    params.marker,
+               MaxKeys:   params.maxKeys,
+       }
+       commonPrefixes := map[string]bool{}
+       err := walkFS(fs, strings.TrimSuffix(bucketdir+"/"+walkpath, "/"), true, func(path string, fi os.FileInfo) error {
+               if path == bucketdir {
+                       return nil
+               }
+               path = path[len(bucketdir)+1:]
+               filesize := fi.Size()
+               if fi.IsDir() {
+                       path += "/"
+                       filesize = 0
+               }
+               if len(path) <= len(params.prefix) {
+                       if path > params.prefix[:len(path)] {
+                               // with prefix "foobar", walking "fooz" means we're done
+                               return errDone
+                       }
+                       if path < params.prefix[:len(path)] {
+                               // with prefix "foobar", walking "foobag" is pointless
+                               return filepath.SkipDir
+                       }
+                       if fi.IsDir() && !strings.HasPrefix(params.prefix+"/", path) {
+                               // with prefix "foo/bar", walking "fo"
+                               // is pointless (but walking "foo" or
+                               // "foo/bar" is necessary)
+                               return filepath.SkipDir
+                       }
+                       if len(path) < len(params.prefix) {
+                               // can't skip anything, and this entry
+                               // isn't in the results, so just
+                               // continue descent
+                               return nil
+                       }
+               } else {
+                       if path[:len(params.prefix)] > params.prefix {
+                               // with prefix "foobar", nothing we
+                               // see after "foozzz" is relevant
+                               return errDone
+                       }
+               }
+               if path < params.marker || path < params.prefix {
+                       return nil
+               }
+               if fi.IsDir() && !h.Config.cluster.Collections.S3FolderObjects {
+                       // Note we don't add anything to
+                       // commonPrefixes here even if delimiter is
+                       // "/". We descend into the directory, and
+                       // return a commonPrefix only if we end up
+                       // finding a regular file inside it.
+                       return nil
+               }
+               if params.delimiter != "" {
+                       idx := strings.Index(path[len(params.prefix):], params.delimiter)
+                       if idx >= 0 {
+                               // with prefix "foobar" and delimiter
+                               // "z", when we hit "foobar/baz", we
+                               // add "/baz" to commonPrefixes and
+                               // stop descending.
+                               commonPrefixes[path[:len(params.prefix)+idx+1]] = true
+                               return filepath.SkipDir
+                       }
+               }
+               if len(resp.Contents)+len(commonPrefixes) >= params.maxKeys {
+                       resp.IsTruncated = true
+                       if params.delimiter != "" {
+                               resp.NextMarker = path
+                       }
+                       return errDone
+               }
+               resp.Contents = append(resp.Contents, s3.Key{
+                       Key:          path,
+                       LastModified: fi.ModTime().UTC().Format("2006-01-02T15:04:05.999") + "Z",
+                       Size:         filesize,
+               })
+               return nil
+       })
+       if err != nil && err != errDone {
+               http.Error(w, err.Error(), http.StatusInternalServerError)
+               return
+       }
+       if params.delimiter != "" {
+               for prefix := range commonPrefixes {
+                       resp.CommonPrefixes = append(resp.CommonPrefixes, prefix)
+                       sort.Strings(resp.CommonPrefixes)
+               }
+       }
+       wrappedResp := struct {
+               XMLName string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult"`
+               s3.ListResp
+       }{"", resp}
+       w.Header().Set("Content-Type", "application/xml")
+       io.WriteString(w, xml.Header)
+       if err := xml.NewEncoder(w).Encode(wrappedResp); err != nil {
+               ctxlog.FromContext(r.Context()).WithError(err).Error("error writing xml response")
+       }
+}
diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go
new file mode 100644 (file)
index 0000000..b82f1ef
--- /dev/null
@@ -0,0 +1,584 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "bytes"
+       "crypto/rand"
+       "fmt"
+       "io/ioutil"
+       "net/http"
+       "os"
+       "strings"
+       "sync"
+       "time"
+
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/arvadosclient"
+       "git.arvados.org/arvados.git/sdk/go/arvadostest"
+       "git.arvados.org/arvados.git/sdk/go/keepclient"
+       "github.com/AdRoll/goamz/aws"
+       "github.com/AdRoll/goamz/s3"
+       check "gopkg.in/check.v1"
+)
+
+type s3stage struct {
+       arv        *arvados.Client
+       ac         *arvadosclient.ArvadosClient
+       kc         *keepclient.KeepClient
+       proj       arvados.Group
+       projbucket *s3.Bucket
+       coll       arvados.Collection
+       collbucket *s3.Bucket
+}
+
+func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
+       var proj arvados.Group
+       var coll arvados.Collection
+       arv := arvados.NewClientFromEnv()
+       arv.AuthToken = arvadostest.ActiveToken
+       err := arv.RequestAndDecode(&proj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
+               "group": map[string]interface{}{
+                       "group_class": "project",
+                       "name":        "keep-web s3 test",
+               },
+               "ensure_unique_name": true,
+       })
+       c.Assert(err, check.IsNil)
+       err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
+               "owner_uuid":    proj.UUID,
+               "name":          "keep-web s3 test collection",
+               "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
+       }})
+       c.Assert(err, check.IsNil)
+       ac, err := arvadosclient.New(arv)
+       c.Assert(err, check.IsNil)
+       kc, err := keepclient.MakeKeepClient(ac)
+       c.Assert(err, check.IsNil)
+       fs, err := coll.FileSystem(arv, kc)
+       c.Assert(err, check.IsNil)
+       f, err := fs.OpenFile("sailboat.txt", os.O_CREATE|os.O_WRONLY, 0644)
+       c.Assert(err, check.IsNil)
+       _, err = f.Write([]byte("⛵\n"))
+       c.Assert(err, check.IsNil)
+       err = f.Close()
+       c.Assert(err, check.IsNil)
+       err = fs.Sync()
+       c.Assert(err, check.IsNil)
+       err = arv.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil)
+       c.Assert(err, check.IsNil)
+
+       auth := aws.NewAuth(arvadostest.ActiveTokenV2, arvadostest.ActiveTokenV2, "", time.Now().Add(time.Hour))
+       region := aws.Region{
+               Name:       s.testServer.Addr,
+               S3Endpoint: "http://" + s.testServer.Addr,
+       }
+       client := s3.New(*auth, region)
+       return s3stage{
+               arv:  arv,
+               ac:   ac,
+               kc:   kc,
+               proj: proj,
+               projbucket: &s3.Bucket{
+                       S3:   client,
+                       Name: proj.UUID,
+               },
+               coll: coll,
+               collbucket: &s3.Bucket{
+                       S3:   client,
+                       Name: coll.UUID,
+               },
+       }
+}
+
+func (stage s3stage) teardown(c *check.C) {
+       if stage.coll.UUID != "" {
+               err := stage.arv.RequestAndDecode(&stage.coll, "DELETE", "arvados/v1/collections/"+stage.coll.UUID, nil, nil)
+               c.Check(err, check.IsNil)
+       }
+       if stage.proj.UUID != "" {
+               err := stage.arv.RequestAndDecode(&stage.proj, "DELETE", "arvados/v1/groups/"+stage.proj.UUID, nil, nil)
+               c.Check(err, check.IsNil)
+       }
+}
+
+func (s *IntegrationSuite) TestS3HeadBucket(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+
+       for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
+               c.Logf("bucket %s", bucket.Name)
+               exists, err := bucket.Exists("")
+               c.Check(err, check.IsNil)
+               c.Check(exists, check.Equals, true)
+       }
+}
+
+func (s *IntegrationSuite) TestS3CollectionGetObject(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3GetObject(c, stage.collbucket, "")
+}
+func (s *IntegrationSuite) TestS3ProjectGetObject(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3GetObject(c, stage.projbucket, stage.coll.Name+"/")
+}
+func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix string) {
+       rdr, err := bucket.GetReader(prefix + "emptyfile")
+       c.Assert(err, check.IsNil)
+       buf, err := ioutil.ReadAll(rdr)
+       c.Check(err, check.IsNil)
+       c.Check(len(buf), check.Equals, 0)
+       err = rdr.Close()
+       c.Check(err, check.IsNil)
+
+       // GetObject
+       rdr, err = bucket.GetReader(prefix + "missingfile")
+       c.Check(err, check.ErrorMatches, `404 Not Found`)
+
+       // HeadObject
+       exists, err := bucket.Exists(prefix + "missingfile")
+       c.Check(err, check.IsNil)
+       c.Check(exists, check.Equals, false)
+
+       // GetObject
+       rdr, err = bucket.GetReader(prefix + "sailboat.txt")
+       c.Assert(err, check.IsNil)
+       buf, err = ioutil.ReadAll(rdr)
+       c.Check(err, check.IsNil)
+       c.Check(buf, check.DeepEquals, []byte("⛵\n"))
+       err = rdr.Close()
+       c.Check(err, check.IsNil)
+
+       // HeadObject
+       exists, err = bucket.Exists(prefix + "sailboat.txt")
+       c.Check(err, check.IsNil)
+       c.Check(exists, check.Equals, true)
+}
+
+func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3PutObjectSuccess(c, stage.collbucket, "")
+}
+func (s *IntegrationSuite) TestS3ProjectPutObjectSuccess(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3PutObjectSuccess(c, stage.projbucket, stage.coll.Name+"/")
+}
+func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, prefix string) {
+       for _, trial := range []struct {
+               path        string
+               size        int
+               contentType string
+       }{
+               {
+                       path:        "newfile",
+                       size:        128000000,
+                       contentType: "application/octet-stream",
+               }, {
+                       path:        "newdir/newfile",
+                       size:        1 << 26,
+                       contentType: "application/octet-stream",
+               }, {
+                       path:        "newdir1/newdir2/newfile",
+                       size:        0,
+                       contentType: "application/octet-stream",
+               }, {
+                       path:        "newdir1/newdir2/newdir3/",
+                       size:        0,
+                       contentType: "application/x-directory",
+               },
+       } {
+               c.Logf("=== %v", trial)
+
+               objname := prefix + trial.path
+
+               _, err := bucket.GetReader(objname)
+               c.Assert(err, check.ErrorMatches, `404 Not Found`)
+
+               buf := make([]byte, trial.size)
+               rand.Read(buf)
+
+               err = bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{})
+               c.Check(err, check.IsNil)
+
+               rdr, err := bucket.GetReader(objname)
+               if strings.HasSuffix(trial.path, "/") && !s.testServer.Config.cluster.Collections.S3FolderObjects {
+                       c.Check(err, check.NotNil)
+                       continue
+               } else if !c.Check(err, check.IsNil) {
+                       continue
+               }
+               buf2, err := ioutil.ReadAll(rdr)
+               c.Check(err, check.IsNil)
+               c.Check(buf2, check.HasLen, len(buf))
+               c.Check(bytes.Equal(buf, buf2), check.Equals, true)
+       }
+}
+
+func (s *IntegrationSuite) TestS3ProjectPutObjectNotSupported(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       bucket := stage.projbucket
+
+       for _, trial := range []struct {
+               path        string
+               size        int
+               contentType string
+       }{
+               {
+                       path:        "newfile",
+                       size:        1234,
+                       contentType: "application/octet-stream",
+               }, {
+                       path:        "newdir/newfile",
+                       size:        1234,
+                       contentType: "application/octet-stream",
+               }, {
+                       path:        "newdir2/",
+                       size:        0,
+                       contentType: "application/x-directory",
+               },
+       } {
+               c.Logf("=== %v", trial)
+
+               _, err := bucket.GetReader(trial.path)
+               c.Assert(err, check.ErrorMatches, `404 Not Found`)
+
+               buf := make([]byte, trial.size)
+               rand.Read(buf)
+
+               err = bucket.PutReader(trial.path, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{})
+               c.Check(err, check.ErrorMatches, `400 Bad Request`)
+
+               _, err = bucket.GetReader(trial.path)
+               c.Assert(err, check.ErrorMatches, `404 Not Found`)
+       }
+}
+
+func (s *IntegrationSuite) TestS3CollectionDeleteObject(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3DeleteObject(c, stage.collbucket, "")
+}
+func (s *IntegrationSuite) TestS3ProjectDeleteObject(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3DeleteObject(c, stage.projbucket, stage.coll.Name+"/")
+}
+func (s *IntegrationSuite) testS3DeleteObject(c *check.C, bucket *s3.Bucket, prefix string) {
+       s.testServer.Config.cluster.Collections.S3FolderObjects = true
+       for _, trial := range []struct {
+               path string
+       }{
+               {"/"},
+               {"nonexistentfile"},
+               {"emptyfile"},
+               {"sailboat.txt"},
+               {"sailboat.txt/"},
+               {"emptydir"},
+               {"emptydir/"},
+       } {
+               objname := prefix + trial.path
+               comment := check.Commentf("objname %q", objname)
+
+               err := bucket.Del(objname)
+               if trial.path == "/" {
+                       c.Check(err, check.NotNil)
+                       continue
+               }
+               c.Check(err, check.IsNil, comment)
+               _, err = bucket.GetReader(objname)
+               c.Check(err, check.NotNil, comment)
+       }
+}
+
+func (s *IntegrationSuite) TestS3CollectionPutObjectFailure(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3PutObjectFailure(c, stage.collbucket, "")
+}
+func (s *IntegrationSuite) TestS3ProjectPutObjectFailure(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       s.testS3PutObjectFailure(c, stage.projbucket, stage.coll.Name+"/")
+}
+func (s *IntegrationSuite) testS3PutObjectFailure(c *check.C, bucket *s3.Bucket, prefix string) {
+       s.testServer.Config.cluster.Collections.S3FolderObjects = false
+       var wg sync.WaitGroup
+       for _, trial := range []struct {
+               path string
+       }{
+               {
+                       path: "emptyfile/newname", // emptyfile exists, see s3setup()
+               }, {
+                       path: "emptyfile/", // emptyfile exists, see s3setup()
+               }, {
+                       path: "emptydir", // dir already exists, see s3setup()
+               }, {
+                       path: "emptydir/",
+               }, {
+                       path: "emptydir//",
+               }, {
+                       path: "newdir/",
+               }, {
+                       path: "newdir//",
+               }, {
+                       path: "/",
+               }, {
+                       path: "//",
+               }, {
+                       path: "foo//bar",
+               }, {
+                       path: "",
+               },
+       } {
+               trial := trial
+               wg.Add(1)
+               go func() {
+                       defer wg.Done()
+                       c.Logf("=== %v", trial)
+
+                       objname := prefix + trial.path
+
+                       buf := make([]byte, 1234)
+                       rand.Read(buf)
+
+                       err := bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), "application/octet-stream", s3.Private, s3.Options{})
+                       if !c.Check(err, check.ErrorMatches, `400 Bad.*`, check.Commentf("PUT %q should fail", objname)) {
+                               return
+                       }
+
+                       if objname != "" && objname != "/" {
+                               _, err = bucket.GetReader(objname)
+                               c.Check(err, check.ErrorMatches, `404 Not Found`, check.Commentf("GET %q should return 404", objname))
+                       }
+               }()
+       }
+       wg.Wait()
+}
+
+func (stage *s3stage) writeBigDirs(c *check.C, dirs int, filesPerDir int) {
+       fs, err := stage.coll.FileSystem(stage.arv, stage.kc)
+       c.Assert(err, check.IsNil)
+       for d := 0; d < dirs; d++ {
+               dir := fmt.Sprintf("dir%d", d)
+               c.Assert(fs.Mkdir(dir, 0755), check.IsNil)
+               for i := 0; i < filesPerDir; i++ {
+                       f, err := fs.OpenFile(fmt.Sprintf("%s/file%d.txt", dir, i), os.O_CREATE|os.O_WRONLY, 0644)
+                       c.Assert(err, check.IsNil)
+                       c.Assert(f.Close(), check.IsNil)
+               }
+       }
+       c.Assert(fs.Sync(), check.IsNil)
+}
+
+func (s *IntegrationSuite) TestS3GetBucketVersioning(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+       for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
+               req, err := http.NewRequest("GET", bucket.URL("/"), nil)
+               c.Check(err, check.IsNil)
+               req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
+               req.URL.RawQuery = "versioning"
+               resp, err := http.DefaultClient.Do(req)
+               c.Assert(err, check.IsNil)
+               c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml")
+               buf, err := ioutil.ReadAll(resp.Body)
+               c.Assert(err, check.IsNil)
+               c.Check(string(buf), check.Equals, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<VersioningConfiguration xmlns=\"http://s3.amazonaws.com/doc/2006-03-01/\"/>\n")
+       }
+}
+
+func (s *IntegrationSuite) TestS3CollectionList(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+
+       var markers int
+       for markers, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} {
+               dirs := 2
+               filesPerDir := 1001
+               stage.writeBigDirs(c, dirs, filesPerDir)
+               // Total # objects is:
+               //                 2 file entries from s3setup (emptyfile and sailboat.txt)
+               //                +1 fake "directory" marker from s3setup (emptydir) (if enabled)
+               //             +dirs fake "directory" marker from writeBigDirs (dir0/, dir1/) (if enabled)
+               // +filesPerDir*dirs file entries from writeBigDirs (dir0/file0.txt, etc.)
+               s.testS3List(c, stage.collbucket, "", 4000, markers+2+(filesPerDir+markers)*dirs)
+               s.testS3List(c, stage.collbucket, "", 131, markers+2+(filesPerDir+markers)*dirs)
+               s.testS3List(c, stage.collbucket, "dir0/", 71, filesPerDir+markers)
+       }
+}
+func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix string, pageSize, expectFiles int) {
+       c.Logf("testS3List: prefix=%q pageSize=%d S3FolderObjects=%v", prefix, pageSize, s.testServer.Config.cluster.Collections.S3FolderObjects)
+       expectPageSize := pageSize
+       if expectPageSize > 1000 {
+               expectPageSize = 1000
+       }
+       gotKeys := map[string]s3.Key{}
+       nextMarker := ""
+       pages := 0
+       for {
+               resp, err := bucket.List(prefix, "", nextMarker, pageSize)
+               if !c.Check(err, check.IsNil) {
+                       break
+               }
+               c.Check(len(resp.Contents) <= expectPageSize, check.Equals, true)
+               if pages++; !c.Check(pages <= (expectFiles/expectPageSize)+1, check.Equals, true) {
+                       break
+               }
+               for _, key := range resp.Contents {
+                       gotKeys[key.Key] = key
+                       if strings.Contains(key.Key, "sailboat.txt") {
+                               c.Check(key.Size, check.Equals, int64(4))
+                       }
+               }
+               if !resp.IsTruncated {
+                       c.Check(resp.NextMarker, check.Equals, "")
+                       break
+               }
+               if !c.Check(resp.NextMarker, check.Not(check.Equals), "") {
+                       break
+               }
+               nextMarker = resp.NextMarker
+       }
+       c.Check(len(gotKeys), check.Equals, expectFiles)
+}
+
+func (s *IntegrationSuite) TestS3CollectionListRollup(c *check.C) {
+       for _, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} {
+               s.testS3CollectionListRollup(c)
+       }
+}
+
+func (s *IntegrationSuite) testS3CollectionListRollup(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+
+       dirs := 2
+       filesPerDir := 500
+       stage.writeBigDirs(c, dirs, filesPerDir)
+       err := stage.collbucket.PutReader("dingbats", &bytes.Buffer{}, 0, "application/octet-stream", s3.Private, s3.Options{})
+       c.Assert(err, check.IsNil)
+       var allfiles []string
+       for marker := ""; ; {
+               resp, err := stage.collbucket.List("", "", marker, 20000)
+               c.Check(err, check.IsNil)
+               for _, key := range resp.Contents {
+                       if len(allfiles) == 0 || allfiles[len(allfiles)-1] != key.Key {
+                               allfiles = append(allfiles, key.Key)
+                       }
+               }
+               marker = resp.NextMarker
+               if marker == "" {
+                       break
+               }
+       }
+       markers := 0
+       if s.testServer.Config.cluster.Collections.S3FolderObjects {
+               markers = 1
+       }
+       c.Check(allfiles, check.HasLen, dirs*(filesPerDir+markers)+3+markers)
+
+       gotDirMarker := map[string]bool{}
+       for _, name := range allfiles {
+               isDirMarker := strings.HasSuffix(name, "/")
+               if markers == 0 {
+                       c.Check(isDirMarker, check.Equals, false, check.Commentf("name %q", name))
+               } else if isDirMarker {
+                       gotDirMarker[name] = true
+               } else if i := strings.LastIndex(name, "/"); i >= 0 {
+                       c.Check(gotDirMarker[name[:i+1]], check.Equals, true, check.Commentf("name %q", name))
+                       gotDirMarker[name[:i+1]] = true // skip redundant complaints about this dir marker
+               }
+       }
+
+       for _, trial := range []struct {
+               prefix    string
+               delimiter string
+               marker    string
+       }{
+               {"", "", ""},
+               {"di", "/", ""},
+               {"di", "r", ""},
+               {"di", "n", ""},
+               {"dir0", "/", ""},
+               {"dir0/", "/", ""},
+               {"dir0/f", "/", ""},
+               {"dir0", "", ""},
+               {"dir0/", "", ""},
+               {"dir0/f", "", ""},
+               {"dir0", "/", "dir0/file14.txt"},       // no commonprefixes
+               {"", "", "dir0/file14.txt"},            // middle page, skip walking dir1
+               {"", "", "dir1/file14.txt"},            // middle page, skip walking dir0
+               {"", "", "dir1/file498.txt"},           // last page of results
+               {"dir1/file", "", "dir1/file498.txt"},  // last page of results, with prefix
+               {"dir1/file", "/", "dir1/file498.txt"}, // last page of results, with prefix + delimiter
+               {"dir1", "Z", "dir1/file498.txt"},      // delimiter "Z" never appears
+               {"dir2", "/", ""},                      // prefix "dir2" does not exist
+               {"", "/", ""},
+       } {
+               c.Logf("\n\n=== trial %+v markers=%d", trial, markers)
+
+               maxKeys := 20
+               resp, err := stage.collbucket.List(trial.prefix, trial.delimiter, trial.marker, maxKeys)
+               c.Check(err, check.IsNil)
+               if resp.IsTruncated && trial.delimiter == "" {
+                       // goamz List method fills in the missing
+                       // NextMarker field if resp.IsTruncated, so
+                       // now we can't really tell whether it was
+                       // sent by the server or by goamz. In cases
+                       // where it should be empty but isn't, assume
+                       // it's goamz's fault.
+                       resp.NextMarker = ""
+               }
+
+               var expectKeys []string
+               var expectPrefixes []string
+               var expectNextMarker string
+               var expectTruncated bool
+               for _, key := range allfiles {
+                       full := len(expectKeys)+len(expectPrefixes) >= maxKeys
+                       if !strings.HasPrefix(key, trial.prefix) || key < trial.marker {
+                               continue
+                       } else if idx := strings.Index(key[len(trial.prefix):], trial.delimiter); trial.delimiter != "" && idx >= 0 {
+                               prefix := key[:len(trial.prefix)+idx+1]
+                               if len(expectPrefixes) > 0 && expectPrefixes[len(expectPrefixes)-1] == prefix {
+                                       // same prefix as previous key
+                               } else if full {
+                                       expectNextMarker = key
+                                       expectTruncated = true
+                               } else {
+                                       expectPrefixes = append(expectPrefixes, prefix)
+                               }
+                       } else if full {
+                               if trial.delimiter != "" {
+                                       expectNextMarker = key
+                               }
+                               expectTruncated = true
+                               break
+                       } else {
+                               expectKeys = append(expectKeys, key)
+                       }
+               }
+
+               var gotKeys []string
+               for _, key := range resp.Contents {
+                       gotKeys = append(gotKeys, key.Key)
+               }
+               var gotPrefixes []string
+               for _, prefix := range resp.CommonPrefixes {
+                       gotPrefixes = append(gotPrefixes, prefix)
+               }
+               commentf := check.Commentf("trial %+v markers=%d", trial, markers)
+               c.Check(gotKeys, check.DeepEquals, expectKeys, commentf)
+               c.Check(gotPrefixes, check.DeepEquals, expectPrefixes, commentf)
+               c.Check(resp.NextMarker, check.Equals, expectNextMarker, commentf)
+               c.Check(resp.IsTruncated, check.Equals, expectTruncated, commentf)
+               c.Logf("=== trial %+v keys %q prefixes %q nextMarker %q", trial, gotKeys, gotPrefixes, resp.NextMarker)
+       }
+}
index 46dc3d30179343e29edea208588103fe947c3c08..8f623c627d067f843f2746a2b8b64248006b1a18 100644 (file)
@@ -20,12 +20,12 @@ type server struct {
        Config *Config
 }
 
-func (srv *server) Start() error {
+func (srv *server) Start(logger *logrus.Logger) error {
        h := &handler{Config: srv.Config}
        reg := prometheus.NewRegistry()
        h.Config.Cache.registry = reg
-       ctx := ctxlog.Context(context.Background(), logrus.StandardLogger())
-       mh := httpserver.Instrument(reg, nil, httpserver.HandlerWithContext(ctx, httpserver.AddRequestIDs(httpserver.LogRequests(h))))
+       ctx := ctxlog.Context(context.Background(), logger)
+       mh := httpserver.Instrument(reg, logger, httpserver.HandlerWithContext(ctx, httpserver.AddRequestIDs(httpserver.LogRequests(h))))
        h.MetricsAPI = mh.ServeAPI(h.Config.cluster.ManagementToken, http.NotFoundHandler())
        srv.Handler = mh
        var listen arvados.URL
index bca7ff49fa0820c8affd818dd6c8b26ceca81755..c37852a128bbaa9571ebf1527f3f9f6b6cee41ae 100644 (file)
@@ -442,7 +442,7 @@ func (s *IntegrationSuite) SetUpTest(c *check.C) {
        cfg.cluster.ManagementToken = arvadostest.ManagementToken
        cfg.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
        s.testServer = &server{Config: cfg}
-       err = s.testServer.Start()
+       err = s.testServer.Start(ctxlog.TestLogger(c))
        c.Assert(err, check.Equals, nil)
 }
 
diff --git a/services/nodemanager/.gitignore b/services/nodemanager/.gitignore
deleted file mode 120000 (symlink)
index ed3b362..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../../sdk/python/.gitignore
\ No newline at end of file
diff --git a/services/nodemanager/MANIFEST.in b/services/nodemanager/MANIFEST.in
deleted file mode 100644 (file)
index 8410420..0000000
+++ /dev/null
@@ -1,8 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-include agpl-3.0.txt
-include README.rst
-include arvados_version.py
-include arvados-node-manager.service
diff --git a/services/nodemanager/README.rst b/services/nodemanager/README.rst
deleted file mode 100644 (file)
index 1d725e0..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-.. Copyright (C) The Arvados Authors. All rights reserved.
-..
-.. SPDX-License-Identifier: AGPL-3.0
-
-====================
-Arvados Node Manager
-====================
-
-Overview
---------
-
-This package provides ``arvados-node-manager``.  It dynamically starts
-and stops compute nodes on an Arvados_ cloud installation based on job
-demand.
-
-.. _Arvados: https://arvados.org/
-
-Setup
------
-
-1. Install the package.
-
-2. Write a configuration file.  ``doc/ec2.example.cfg`` documents all
-   of the options available, with specific tunables for EC2 clouds.
-
-3. Run ``arvados-node-manager --config YOURCONFIGFILE`` using whatever
-   supervisor you like (e.g., runit).
-
-Testing and Development
------------------------
-
-To run tests, just run::
-
-  python setup.py test
-
-Our `hacking guide
-<https://arvados.org/projects/arvados/wiki/Hacking_Node_Manager>`_
-provides an architectural overview of the Arvados Node Manager to help
-you find your way around the source.  The `Lifecycle of an Arvados
-compute node
-<https://arvados.org/projects/arvados/wiki/Lifecycle_of_an_Arvados_compute_node>`_
-page explains how it works in concert with other Arvados components to
-prepare a node for compute work.
diff --git a/services/nodemanager/agpl-3.0.txt b/services/nodemanager/agpl-3.0.txt
deleted file mode 100644 (file)
index dba13ed..0000000
+++ /dev/null
@@ -1,661 +0,0 @@
-                    GNU AFFERO GENERAL PUBLIC LICENSE
-                       Version 3, 19 November 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-                            Preamble
-
-  The GNU Affero General Public License is a free, copyleft license for
-software and other kinds of works, specifically designed to ensure
-cooperation with the community in the case of network server software.
-
-  The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works.  By contrast,
-our General Public Licenses are intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users.
-
-  When we speak of free software, we are referring to freedom, not
-price.  Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
-  Developers that use our General Public Licenses protect your rights
-with two steps: (1) assert copyright on the software, and (2) offer
-you this License which gives you legal permission to copy, distribute
-and/or modify the software.
-
-  A secondary benefit of defending all users' freedom is that
-improvements made in alternate versions of the program, if they
-receive widespread use, become available for other developers to
-incorporate.  Many developers of free software are heartened and
-encouraged by the resulting cooperation.  However, in the case of
-software used on network servers, this result may fail to come about.
-The GNU General Public License permits making a modified version and
-letting the public access it on a server without ever releasing its
-source code to the public.
-
-  The GNU Affero General Public License is designed specifically to
-ensure that, in such cases, the modified source code becomes available
-to the community.  It requires the operator of a network server to
-provide the source code of the modified version running there to the
-users of that server.  Therefore, public use of a modified version, on
-a publicly accessible server, gives the public access to the source
-code of the modified version.
-
-  An older license, called the Affero General Public License and
-published by Affero, was designed to accomplish similar goals.  This is
-a different license, not a version of the Affero GPL, but Affero has
-released a new version of the Affero GPL which permits relicensing under
-this license.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.
-
-                       TERMS AND CONDITIONS
-
-  0. Definitions.
-
-  "This License" refers to version 3 of the GNU Affero General Public License.
-
-  "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
-  "The Program" refers to any copyrightable work licensed under this
-License.  Each licensee is addressed as "you".  "Licensees" and
-"recipients" may be individuals or organizations.
-
-  To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy.  The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
-  A "covered work" means either the unmodified Program or a work based
-on the Program.
-
-  To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy.  Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
-  To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies.  Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
-  An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License.  If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
-  1. Source Code.
-
-  The "source code" for a work means the preferred form of the work
-for making modifications to it.  "Object code" means any non-source
-form of a work.
-
-  A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
-  The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form.  A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
-  The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities.  However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work.  For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
-  The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
-  The Corresponding Source for a work in source code form is that
-same work.
-
-  2. Basic Permissions.
-
-  All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met.  This License explicitly affirms your unlimited
-permission to run the unmodified Program.  The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work.  This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
-  You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force.  You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright.  Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
-  Conveying under any other circumstances is permitted solely under
-the conditions stated below.  Sublicensing is not allowed; section 10
-makes it unnecessary.
-
-  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
-  No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
-  When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
-  4. Conveying Verbatim Copies.
-
-  You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
-  You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
-  5. Conveying Modified Source Versions.
-
-  You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
-    a) The work must carry prominent notices stating that you modified
-    it, and giving a relevant date.
-
-    b) The work must carry prominent notices stating that it is
-    released under this License and any conditions added under section
-    7.  This requirement modifies the requirement in section 4 to
-    "keep intact all notices".
-
-    c) You must license the entire work, as a whole, under this
-    License to anyone who comes into possession of a copy.  This
-    License will therefore apply, along with any applicable section 7
-    additional terms, to the whole of the work, and all its parts,
-    regardless of how they are packaged.  This License gives no
-    permission to license the work in any other way, but it does not
-    invalidate such permission if you have separately received it.
-
-    d) If the work has interactive user interfaces, each must display
-    Appropriate Legal Notices; however, if the Program has interactive
-    interfaces that do not display Appropriate Legal Notices, your
-    work need not make them do so.
-
-  A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit.  Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
-  6. Conveying Non-Source Forms.
-
-  You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
-    a) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by the
-    Corresponding Source fixed on a durable physical medium
-    customarily used for software interchange.
-
-    b) Convey the object code in, or embodied in, a physical product
-    (including a physical distribution medium), accompanied by a
-    written offer, valid for at least three years and valid for as
-    long as you offer spare parts or customer support for that product
-    model, to give anyone who possesses the object code either (1) a
-    copy of the Corresponding Source for all the software in the
-    product that is covered by this License, on a durable physical
-    medium customarily used for software interchange, for a price no
-    more than your reasonable cost of physically performing this
-    conveying of source, or (2) access to copy the
-    Corresponding Source from a network server at no charge.
-
-    c) Convey individual copies of the object code with a copy of the
-    written offer to provide the Corresponding Source.  This
-    alternative is allowed only occasionally and noncommercially, and
-    only if you received the object code with such an offer, in accord
-    with subsection 6b.
-
-    d) Convey the object code by offering access from a designated
-    place (gratis or for a charge), and offer equivalent access to the
-    Corresponding Source in the same way through the same place at no
-    further charge.  You need not require recipients to copy the
-    Corresponding Source along with the object code.  If the place to
-    copy the object code is a network server, the Corresponding Source
-    may be on a different server (operated by you or a third party)
-    that supports equivalent copying facilities, provided you maintain
-    clear directions next to the object code saying where to find the
-    Corresponding Source.  Regardless of what server hosts the
-    Corresponding Source, you remain obligated to ensure that it is
-    available for as long as needed to satisfy these requirements.
-
-    e) Convey the object code using peer-to-peer transmission, provided
-    you inform other peers where the object code and Corresponding
-    Source of the work are being offered to the general public at no
-    charge under subsection 6d.
-
-  A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
-  A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling.  In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage.  For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product.  A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
-  "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source.  The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
-  If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information.  But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
-  The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed.  Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
-  Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
-  7. Additional Terms.
-
-  "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law.  If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
-  When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it.  (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.)  You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
-  Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
-    a) Disclaiming warranty or limiting liability differently from the
-    terms of sections 15 and 16 of this License; or
-
-    b) Requiring preservation of specified reasonable legal notices or
-    author attributions in that material or in the Appropriate Legal
-    Notices displayed by works containing it; or
-
-    c) Prohibiting misrepresentation of the origin of that material, or
-    requiring that modified versions of such material be marked in
-    reasonable ways as different from the original version; or
-
-    d) Limiting the use for publicity purposes of names of licensors or
-    authors of the material; or
-
-    e) Declining to grant rights under trademark law for use of some
-    trade names, trademarks, or service marks; or
-
-    f) Requiring indemnification of licensors and authors of that
-    material by anyone who conveys the material (or modified versions of
-    it) with contractual assumptions of liability to the recipient, for
-    any liability that these contractual assumptions directly impose on
-    those licensors and authors.
-
-  All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10.  If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term.  If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
-  If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
-  Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
-  8. Termination.
-
-  You may not propagate or modify a covered work except as expressly
-provided under this License.  Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
-  However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
-  Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
-  Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License.  If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
-  9. Acceptance Not Required for Having Copies.
-
-  You are not required to accept this License in order to receive or
-run a copy of the Program.  Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance.  However,
-nothing other than this License grants you permission to propagate or
-modify any covered work.  These actions infringe copyright if you do
-not accept this License.  Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
-  10. Automatic Licensing of Downstream Recipients.
-
-  Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License.  You are not responsible
-for enforcing compliance by third parties with this License.
-
-  An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations.  If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
-  You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License.  For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
-  11. Patents.
-
-  A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based.  The
-work thus licensed is called the contributor's "contributor version".
-
-  A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version.  For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
-  Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
-  In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement).  To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
-  If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients.  "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
-  If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
-  A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License.  You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
-  Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
-  12. No Surrender of Others' Freedom.
-
-  If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all.  For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
-  13. Remote Network Interaction; Use with the GNU General Public License.
-
-  Notwithstanding any other provision of this License, if you modify the
-Program, your modified version must prominently offer all users
-interacting with it remotely through a computer network (if your version
-supports such interaction) an opportunity to receive the Corresponding
-Source of your version by providing access to the Corresponding Source
-from a network server at no charge, through some standard or customary
-means of facilitating copying of software.  This Corresponding Source
-shall include the Corresponding Source for any work covered by version 3
-of the GNU General Public License that is incorporated pursuant to the
-following paragraph.
-
-  Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU General Public License into a single
-combined work, and to convey the resulting work.  The terms of this
-License will continue to apply to the part which is the covered work,
-but the work with which it is combined will remain governed by version
-3 of the GNU General Public License.
-
-  14. Revised Versions of this License.
-
-  The Free Software Foundation may publish revised and/or new versions of
-the GNU Affero General Public License from time to time.  Such new versions
-will be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-  Each version is given a distinguishing version number.  If the
-Program specifies that a certain numbered version of the GNU Affero General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation.  If the Program does not specify a version number of the
-GNU Affero General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
-  If the Program specifies that a proxy can decide which future
-versions of the GNU Affero General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
-  Later license versions may give you additional or different
-permissions.  However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
-  15. Disclaimer of Warranty.
-
-  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. Limitation of Liability.
-
-  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
-  17. Interpretation of Sections 15 and 16.
-
-  If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
-                     END OF TERMS AND CONDITIONS
-
-            How to Apply These Terms to Your New Programs
-
-  If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
-  To do so, attach the following notices to the program.  It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the program's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This program is free software: you can redistribute it and/or modify
-    it under the terms of the GNU Affero General Public License as published by
-    the Free Software Foundation, either version 3 of the License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-    GNU Affero General Public License for more details.
-
-    You should have received a copy of the GNU Affero General Public License
-    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
-  If your software can interact with users remotely through a computer
-network, you should also make sure that it provides a way for users to
-get its source.  For example, if your program is a web application, its
-interface could display a "Source" link that leads users to an archive
-of the code.  There are many ways you could offer source, and different
-solutions will be better for different programs; see section 13 for the
-specific requirements.
-
-  You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU AGPL, see
-<http://www.gnu.org/licenses/>.
diff --git a/services/nodemanager/arvados-node-manager.service b/services/nodemanager/arvados-node-manager.service
deleted file mode 100644 (file)
index 38c525b..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-[Unit]
-Description=Arvados Node Manager Daemon
-Documentation=https://doc.arvados.org/
-After=network.target
-AssertPathExists=/etc/arvados-node-manager/config.ini
-
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
-# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
-StartLimitIntervalSec=0
-
-[Service]
-EnvironmentFile=-/etc/default/arvados-node-manager
-LimitDATA=3145728K
-LimitRSS=3145728K
-LimitMEMLOCK=3145728K
-LimitNOFILE=10240
-Type=simple
-ExecStart=/usr/bin/env sh -c '/usr/bin/arvados-node-manager --foreground --config /etc/arvados-node-manager/config.ini 2>&1 | cat'
-Restart=always
-RestartSec=1
-
-# systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
-StartLimitInterval=0
-
-[Install]
-WantedBy=multi-user.target
diff --git a/services/nodemanager/arvados_version.py b/services/nodemanager/arvados_version.py
deleted file mode 100644 (file)
index 0c65369..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-import subprocess
-import time
-import os
-import re
-
-SETUP_DIR = os.path.dirname(os.path.abspath(__file__))
-
-def choose_version_from():
-    sdk_ts = subprocess.check_output(
-        ['git', 'log', '--first-parent', '--max-count=1',
-         '--format=format:%ct', os.path.join(SETUP_DIR, "../../sdk/python")]).strip()
-    cwl_ts = subprocess.check_output(
-        ['git', 'log', '--first-parent', '--max-count=1',
-         '--format=format:%ct', SETUP_DIR]).strip()
-    if int(sdk_ts) > int(cwl_ts):
-        getver = os.path.join(SETUP_DIR, "../../sdk/python")
-    else:
-        getver = SETUP_DIR
-    return getver
-
-def git_version_at_commit():
-    curdir = choose_version_from()
-    myhash = subprocess.check_output(['git', 'log', '-n1', '--first-parent',
-                                       '--format=%H', curdir]).strip()
-    myversion = subprocess.check_output([curdir+'/../../build/version-at-commit.sh', myhash]).strip().decode()
-    return myversion
-
-def save_version(setup_dir, module, v):
-  with open(os.path.join(setup_dir, module, "_version.py"), 'wt') as fp:
-      return fp.write("__version__ = '%s'\n" % v)
-
-def read_version(setup_dir, module):
-  with open(os.path.join(setup_dir, module, "_version.py"), 'rt') as fp:
-      return re.match("__version__ = '(.*)'$", fp.read()).groups()[0]
-
-def get_version(setup_dir, module):
-    env_version = os.environ.get("ARVADOS_BUILDING_VERSION")
-
-    if env_version:
-        save_version(setup_dir, module, env_version)
-    else:
-        try:
-            save_version(setup_dir, module, git_version_at_commit())
-        except (subprocess.CalledProcessError, OSError):
-            pass
-
-    return read_version(setup_dir, module)
diff --git a/services/nodemanager/arvnodeman/__init__.py b/services/nodemanager/arvnodeman/__init__.py
deleted file mode 100644 (file)
index 3f94807..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import _strptime  # See <http://bugs.python.org/issue7980#msg221094>.
-import logging
-
-logger = logging.getLogger('arvnodeman')
-logger.addHandler(logging.NullHandler())
diff --git a/services/nodemanager/arvnodeman/baseactor.py b/services/nodemanager/arvnodeman/baseactor.py
deleted file mode 100644 (file)
index bdfe5d4..0000000
+++ /dev/null
@@ -1,129 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import errno
-import logging
-import os
-import signal
-import time
-import threading
-import traceback
-
-import pykka
-
-from .status import tracker
-
-class _TellCallableProxy(object):
-    """Internal helper class for proxying callables."""
-
-    def __init__(self, ref, attr_path):
-        self.actor_ref = ref
-        self._attr_path = attr_path
-
-    def __call__(self, *args, **kwargs):
-        message = {
-            'command': 'pykka_call',
-            'attr_path': self._attr_path,
-            'args': args,
-            'kwargs': kwargs,
-        }
-        self.actor_ref.tell(message)
-
-
-class TellActorProxy(pykka.ActorProxy):
-    """ActorProxy in which all calls are implemented as using tell().
-
-    The standard pykka.ActorProxy always uses ask() and returns a Future.  If
-    the target method raises an exception, it is placed in the Future object
-    and re-raised when get() is called on the Future.  Unfortunately, most
-    messaging in Node Manager is asynchronous and the caller does not store the
-    Future object returned by the call to ActorProxy.  As a result, exceptions
-    resulting from these calls end up in limbo, neither reported in the logs
-    nor handled by on_failure().
-
-    The TellActorProxy uses tell() instead of ask() and does not return a
-    Future object.  As a result, if the target method raises an exception, it
-    will be logged and on_failure() will be called as intended.
-
-    """
-
-    def __repr__(self):
-        return '<ActorProxy for %s, attr_path=%s>' % (
-            self.actor_ref, self._attr_path)
-
-    def __getattr__(self, name):
-        """Get a callable from the actor."""
-        attr_path = self._attr_path + (name,)
-        if attr_path not in self._known_attrs:
-            self._known_attrs = self._get_attributes()
-        attr_info = self._known_attrs.get(attr_path)
-        if attr_info is None:
-            raise AttributeError('%s has no attribute "%s"' % (self, name))
-        if attr_info['callable']:
-            if attr_path not in self._callable_proxies:
-                self._callable_proxies[attr_path] = _TellCallableProxy(
-                    self.actor_ref, attr_path)
-            return self._callable_proxies[attr_path]
-        else:
-            raise AttributeError('attribute "%s" is not a callable on %s' % (name, self))
-
-class TellableActorRef(pykka.ActorRef):
-    """ActorRef adding the tell_proxy() method to get TellActorProxy."""
-
-    def tell_proxy(self):
-        return TellActorProxy(self)
-
-class BaseNodeManagerActor(pykka.ThreadingActor):
-    """Base class for actors in node manager, redefining actor_ref as a
-    TellableActorRef and providing a default on_failure handler.
-    """
-
-    def __init__(self, *args, **kwargs):
-         super(pykka.ThreadingActor, self).__init__(*args, **kwargs)
-         self.actor_ref = TellableActorRef(self)
-         self._killfunc = kwargs.get("killfunc", os.kill)
-
-    def on_failure(self, exception_type, exception_value, tb):
-        lg = getattr(self, "_logger", logging)
-        if (exception_type in (threading.ThreadError, MemoryError) or
-            exception_type is OSError and exception_value.errno == errno.ENOMEM):
-            lg.critical("Unhandled exception is a fatal error, killing Node Manager")
-            self._killfunc(os.getpid(), signal.SIGKILL)
-        tracker.counter_add('actor_exceptions')
-
-    def ping(self):
-        return True
-
-    def get_thread(self):
-        return threading.current_thread()
-
-class WatchdogActor(pykka.ThreadingActor):
-    def __init__(self, timeout, *args, **kwargs):
-         super(pykka.ThreadingActor, self).__init__(*args, **kwargs)
-         self.timeout = timeout
-         self.actors = [a.proxy() for a in args]
-         self.actor_ref = TellableActorRef(self)
-         self._later = self.actor_ref.tell_proxy()
-         self._killfunc = kwargs.get("killfunc", os.kill)
-
-    def kill_self(self, e, act):
-        lg = getattr(self, "_logger", logging)
-        lg.critical("Watchdog exception", exc_info=e)
-        lg.critical("Actor %s watchdog ping time out, killing Node Manager", act)
-        self._killfunc(os.getpid(), signal.SIGKILL)
-
-    def on_start(self):
-        self._later.run()
-
-    def run(self):
-        a = None
-        try:
-            for a in self.actors:
-                a.ping().get(self.timeout)
-            time.sleep(20)
-            self._later.run()
-        except Exception as e:
-            self.kill_self(e, a)
diff --git a/services/nodemanager/arvnodeman/clientactor.py b/services/nodemanager/arvnodeman/clientactor.py
deleted file mode 100644 (file)
index afc4f1c..0000000
+++ /dev/null
@@ -1,116 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import logging
-import time
-
-import pykka
-
-from .config import actor_class
-
-def _notify_subscribers(response, subscribers):
-    """Send the response to all the subscriber methods.
-
-    If any of the subscriber actors have stopped, remove them from the
-    subscriber set.
-    """
-    dead_subscribers = set()
-    for subscriber in subscribers:
-        try:
-            subscriber(response)
-        except pykka.ActorDeadError:
-            dead_subscribers.add(subscriber)
-    subscribers.difference_update(dead_subscribers)
-
-class RemotePollLoopActor(actor_class):
-    """Abstract actor class to regularly poll a remote service.
-
-    This actor sends regular requests to a remote service, and sends each
-    response to subscribers.  It takes care of error handling, and retrying
-    requests with exponential backoff.
-
-    To use this actor, define the _send_request method.  If you also
-    define an _item_key method, this class will support subscribing to
-    a specific item by key in responses.
-    """
-    def __init__(self, client, timer_actor, poll_wait=60, max_poll_wait=180):
-        super(RemotePollLoopActor, self).__init__()
-        self._client = client
-        self._timer = timer_actor
-        self._later = self.actor_ref.tell_proxy()
-        self._polling_started = False
-        self.min_poll_wait = poll_wait
-        self.max_poll_wait = max_poll_wait
-        self.poll_wait = self.min_poll_wait
-        self.all_subscribers = set()
-        self.key_subscribers = {}
-        if hasattr(self, '_item_key'):
-            self.subscribe_to = self._subscribe_to
-
-    def on_start(self):
-        self._logger = logging.getLogger("%s.%s" % (self.__class__.__name__, id(self.actor_urn[9:])))
-
-    def _start_polling(self):
-        if not self._polling_started:
-            self._polling_started = True
-            self._later.poll()
-
-    def subscribe(self, subscriber):
-        self.all_subscribers.add(subscriber)
-        self._logger.debug("%s subscribed to all events", subscriber.actor_ref.actor_urn)
-        self._start_polling()
-
-    # __init__ exposes this method to the proxy if the subclass defines
-    # _item_key.
-    def _subscribe_to(self, key, subscriber):
-        self.key_subscribers.setdefault(key, set()).add(subscriber)
-        self._logger.debug("%s subscribed to events for '%s'", subscriber.actor_ref.actor_urn, key)
-        self._start_polling()
-
-    def _send_request(self):
-        raise NotImplementedError("subclasses must implement request method")
-
-    def _got_response(self, response):
-        self.poll_wait = self.min_poll_wait
-        _notify_subscribers(response, self.all_subscribers)
-        if hasattr(self, '_item_key'):
-            items = {self._item_key(x): x for x in response}
-            for key, subscribers in self.key_subscribers.iteritems():
-                _notify_subscribers(items.get(key), subscribers)
-
-    def _got_error(self, error):
-        self.poll_wait = min(self.poll_wait * 2, self.max_poll_wait)
-        return "got error: {} - will try again in {} seconds".format(
-            error, self.poll_wait)
-
-    def is_common_error(self, exception):
-        return False
-
-    def poll(self, scheduled_start=None):
-        self._logger.debug("sending request")
-        start_time = time.time()
-        if scheduled_start is None:
-            scheduled_start = start_time
-        try:
-            response = self._send_request()
-        except Exception as error:
-            errmsg = self._got_error(error)
-            if self.is_common_error(error):
-                self._logger.warning(errmsg)
-            else:
-                self._logger.exception(errmsg)
-            next_poll = start_time + self.poll_wait
-        else:
-            self._got_response(response)
-            next_poll = scheduled_start + self.poll_wait
-            self._logger.info("got response with %d items in %s seconds, next poll at %s",
-                              len(response), (time.time() - scheduled_start),
-                              time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(next_poll)))
-        end_time = time.time()
-        if next_poll < end_time:  # We've drifted too much; start fresh.
-            next_poll = end_time + self.poll_wait
-        self._timer.schedule(next_poll, self._later.poll, next_poll)
diff --git a/services/nodemanager/arvnodeman/computenode/__init__.py b/services/nodemanager/arvnodeman/computenode/__init__.py
deleted file mode 100644 (file)
index b124c66..0000000
+++ /dev/null
@@ -1,201 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import calendar
-import functools
-import itertools
-import re
-import time
-
-from ..config import CLOUD_ERRORS
-from ..status import tracker
-from libcloud.common.exceptions import BaseHTTPError, RateLimitReachedError
-
-ARVADOS_TIMEFMT = '%Y-%m-%dT%H:%M:%SZ'
-ARVADOS_TIMESUBSEC_RE = re.compile(r'(\.\d+)Z$')
-
-def arvados_node_fqdn(arvados_node, default_hostname='dynamic.compute'):
-    hostname = arvados_node.get('hostname') or default_hostname
-    return '{}.{}'.format(hostname, arvados_node['domain'])
-
-def arvados_node_mtime(node):
-    return arvados_timestamp(node['modified_at'])
-
-def arvados_timestamp(timestr):
-    subsec_match = ARVADOS_TIMESUBSEC_RE.search(timestr)
-    if subsec_match is None:
-        subsecs = .0
-    else:
-        subsecs = float(subsec_match.group(1))
-        timestr = timestr[:subsec_match.start()] + 'Z'
-    return calendar.timegm(time.strptime(timestr + 'UTC',
-                                         ARVADOS_TIMEFMT + '%Z')) + subsecs
-
-def timestamp_fresh(timestamp, fresh_time):
-    return (time.time() - timestamp) < fresh_time
-
-def arvados_node_missing(arvados_node, fresh_time):
-    """Indicate if cloud node corresponding to the arvados
-    node is "missing".
-
-    If True, this means the node has not pinged the API server within the timeout
-    period.  If False, the ping is up to date.  If the node has never pinged,
-    returns None.
-    """
-    if arvados_node["last_ping_at"] is None:
-        return None
-    else:
-        return not timestamp_fresh(arvados_timestamp(arvados_node["last_ping_at"]), fresh_time)
-
-class RetryMixin(object):
-    """Retry decorator for an method that makes remote requests.
-
-    Use this function to decorate method, and pass in a tuple of exceptions to
-    catch.  If the original method raises a known cloud driver error, or any of
-    the given exception types, this decorator will either go into a
-    sleep-and-retry loop with exponential backoff either by sleeping (if
-    self._timer is None) or by scheduling retries of the method (if self._timer
-    is a timer actor.)
-
-    """
-    def __init__(self, retry_wait, max_retry_wait, logger, cloud, timer=None):
-        self.min_retry_wait = max(1, retry_wait)
-        self.max_retry_wait = max(self.min_retry_wait, max_retry_wait)
-        self.retry_wait = retry_wait
-        self._logger = logger
-        self._cloud = cloud
-        self._timer = timer
-
-    @staticmethod
-    def _retry(errors=()):
-        def decorator(orig_func):
-            @functools.wraps(orig_func)
-            def retry_wrapper(self, *args, **kwargs):
-                while True:
-                    should_retry = False
-                    try:
-                        ret = orig_func(self, *args, **kwargs)
-                    except RateLimitReachedError as error:
-                        # If retry-after is zero, continue with exponential
-                        # backoff.
-                        if error.retry_after != 0:
-                            self.retry_wait = error.retry_after
-                        should_retry = True
-                    except BaseHTTPError as error:
-                        if error.headers and error.headers.get("retry-after"):
-                            try:
-                                retry_after = int(error.headers["retry-after"])
-                                # If retry-after is zero, continue with
-                                # exponential backoff.
-                                if retry_after != 0:
-                                    self.retry_wait = retry_after
-                                should_retry = True
-                            except ValueError:
-                                self._logger.warning(
-                                    "Unrecognizable Retry-After header: %r",
-                                    error.headers["retry-after"],
-                                    exc_info=error)
-                        if error.code == 429 or error.code >= 500:
-                            should_retry = True
-                    except CLOUD_ERRORS as error:
-                        tracker.counter_add('cloud_errors')
-                        should_retry = True
-                    except errors as error:
-                        should_retry = True
-                    except Exception as error:
-                        # As a libcloud workaround for drivers that don't use
-                        # typed exceptions, consider bare Exception() objects
-                        # retryable.
-                        if type(error) is Exception:
-                            tracker.counter_add('cloud_errors')
-                            should_retry = True
-                    else:
-                        # No exception
-                        self.retry_wait = self.min_retry_wait
-                        return ret
-
-                    # Only got here if an exception was caught.  Now determine what to do about it.
-                    if not should_retry:
-                        self.retry_wait = self.min_retry_wait
-                        self._logger.warning(
-                            "Re-raising error (no retry): %s",
-                            error, exc_info=error)
-                        raise
-
-                    # Retry wait out of bounds?
-                    if self.retry_wait < self.min_retry_wait:
-                        self.retry_wait = self.min_retry_wait
-                    elif self.retry_wait > self.max_retry_wait:
-                        self.retry_wait = self.max_retry_wait
-
-                    self._logger.warning(
-                        "Client error: %s - %s %s seconds",
-                        error,
-                        "scheduling retry in" if self._timer else "sleeping",
-                        self.retry_wait,
-                        exc_info=error)
-
-                    if self._timer:
-                        start_time = time.time()
-                        # reschedule to be called again
-                        self._timer.schedule(start_time + self.retry_wait,
-                                             getattr(self._later,
-                                                     orig_func.__name__),
-                                             *args, **kwargs)
-                    else:
-                        # sleep on it.
-                        time.sleep(self.retry_wait)
-
-                    self.retry_wait = min(self.retry_wait * 2,
-                                          self.max_retry_wait)
-                    if self._timer:
-                        # expect to be called again by timer so don't loop
-                        return
-
-            return retry_wrapper
-        return decorator
-
-class ShutdownTimer(object):
-    """Keep track of a cloud node's shutdown windows.
-
-    Instantiate this class with a timestamp of when a cloud node started,
-    and a list of durations (in minutes) of when the node must not and may
-    be shut down, alternating.  The class will tell you when a shutdown
-    window is open, and when the next open window will start.
-    """
-    def __init__(self, start_time, shutdown_windows):
-        # The implementation is easiest if we have an even number of windows,
-        # because then windows always alternate between open and closed.
-        # Rig that up: calculate the first shutdown window based on what's
-        # passed in.  Then, if we were given an odd number of windows, merge
-        # that first window into the last one, since they both# represent
-        # closed state.
-        first_window = shutdown_windows[0]
-        shutdown_windows = list(shutdown_windows[1:])
-        self._next_opening = start_time + (60 * first_window)
-        if len(shutdown_windows) % 2:
-            shutdown_windows.append(first_window)
-        else:
-            shutdown_windows[-1] += first_window
-        self.shutdown_windows = itertools.cycle([60 * n
-                                                 for n in shutdown_windows])
-        self._open_start = self._next_opening
-        self._open_for = next(self.shutdown_windows)
-
-    def _advance_opening(self):
-        while self._next_opening < time.time():
-            self._open_start = self._next_opening
-            self._next_opening += self._open_for + next(self.shutdown_windows)
-            self._open_for = next(self.shutdown_windows)
-
-    def next_opening(self):
-        self._advance_opening()
-        return self._next_opening
-
-    def window_open(self):
-        self._advance_opening()
-        return 0 < (time.time() - self._open_start) < self._open_for
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
deleted file mode 100644 (file)
index 77c515d..0000000
+++ /dev/null
@@ -1,536 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import functools
-import logging
-import time
-import re
-
-import libcloud.common.types as cloud_types
-from libcloud.common.exceptions import BaseHTTPError
-
-import pykka
-
-from .. import \
-    arvados_node_fqdn, arvados_node_mtime, arvados_timestamp, timestamp_fresh, \
-    arvados_node_missing, RetryMixin
-from ...clientactor import _notify_subscribers
-from ... import config
-from ... import status
-from .transitions import transitions
-
-QuotaExceeded = "QuotaExceeded"
-
-class ComputeNodeStateChangeBase(config.actor_class, RetryMixin):
-    """Base class for actors that change a compute node's state.
-
-    This base class takes care of retrying changes and notifying
-    subscribers when the change is finished.
-    """
-    def __init__(self, cloud_client, arvados_client, timer_actor,
-                 retry_wait, max_retry_wait):
-        super(ComputeNodeStateChangeBase, self).__init__()
-        RetryMixin.__init__(self, retry_wait, max_retry_wait,
-                            None, cloud_client, timer_actor)
-        self._later = self.actor_ref.tell_proxy()
-        self._arvados = arvados_client
-        self.subscribers = set()
-
-    def _set_logger(self):
-        self._logger = logging.getLogger("%s.%s" % (self.__class__.__name__, self.actor_urn[33:]))
-
-    def on_start(self):
-        self._set_logger()
-
-    def _finished(self):
-        if self.subscribers is None:
-            raise Exception("Actor tried to finish twice")
-        _notify_subscribers(self.actor_ref.proxy(), self.subscribers)
-        self.subscribers = None
-        self._logger.info("finished")
-
-    def subscribe(self, subscriber):
-        if self.subscribers is None:
-            try:
-                subscriber(self.actor_ref.proxy())
-            except pykka.ActorDeadError:
-                pass
-        else:
-            self.subscribers.add(subscriber)
-
-    def _clean_arvados_node(self, arvados_node, explanation):
-        return self._arvados.nodes().update(
-            uuid=arvados_node['uuid'],
-            body={'hostname': None,
-                  'ip_address': None,
-                  'slot_number': None,
-                  'first_ping_at': None,
-                  'last_ping_at': None,
-                  'properties': {},
-                  'info': {'ec2_instance_id': None,
-                           'last_action': explanation}},
-            ).execute()
-
-    @staticmethod
-    def _finish_on_exception(orig_func):
-        @functools.wraps(orig_func)
-        def finish_wrapper(self, *args, **kwargs):
-            try:
-                return orig_func(self, *args, **kwargs)
-            except Exception as error:
-                self._logger.error("Actor error %s", error)
-                self._finished()
-        return finish_wrapper
-
-
-class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
-    """Actor to create and set up a cloud compute node.
-
-    This actor prepares an Arvados node record for a new compute node
-    (either creating one or cleaning one passed in), then boots the
-    actual compute node.  It notifies subscribers when the cloud node
-    is successfully created (the last step in the process for Node
-    Manager to handle).
-    """
-    def __init__(self, timer_actor, arvados_client, cloud_client,
-                 cloud_size, arvados_node=None,
-                 retry_wait=1, max_retry_wait=180):
-        super(ComputeNodeSetupActor, self).__init__(
-            cloud_client, arvados_client, timer_actor,
-            retry_wait, max_retry_wait)
-        self.cloud_size = cloud_size
-        self.arvados_node = None
-        self.cloud_node = None
-        self.error = None
-        if arvados_node is None:
-            self._later.create_arvados_node()
-        else:
-            self._later.prepare_arvados_node(arvados_node)
-
-    @ComputeNodeStateChangeBase._finish_on_exception
-    @RetryMixin._retry(config.ARVADOS_ERRORS)
-    def create_arvados_node(self):
-        self.arvados_node = self._arvados.nodes().create(
-            body={}, assign_slot=True).execute()
-        self._later.create_cloud_node()
-
-    @ComputeNodeStateChangeBase._finish_on_exception
-    @RetryMixin._retry(config.ARVADOS_ERRORS)
-    def prepare_arvados_node(self, node):
-        self._clean_arvados_node(node, "Prepared by Node Manager")
-        self.arvados_node = self._arvados.nodes().update(
-            uuid=node['uuid'], body={}, assign_slot=True).execute()
-        self._later.create_cloud_node()
-
-    @ComputeNodeStateChangeBase._finish_on_exception
-    @RetryMixin._retry()
-    def create_cloud_node(self):
-        self._logger.info("Sending create_node request for node size %s.",
-                          self.cloud_size.id)
-        try:
-            self.cloud_node = self._cloud.create_node(self.cloud_size,
-                                                      self.arvados_node)
-        except BaseHTTPError as e:
-            if e.code == 429 or "RequestLimitExceeded" in e.message:
-                # Don't consider API rate limits to be quota errors.
-                # re-raise so the Retry logic applies.
-                raise
-
-            # The set of possible error codes / messages isn't documented for
-            # all clouds, so use a keyword heuristic to determine if the
-            # failure is likely due to a quota.
-            if re.search(r'(exceed|quota|limit)', e.message, re.I):
-                self.error = QuotaExceeded
-                self._logger.warning("Quota exceeded: %s", e)
-                self._finished()
-                return
-            else:
-                # Something else happened, re-raise so the Retry logic applies.
-                raise
-        except Exception as e:
-            raise
-
-        # The information included in the node size object we get from libcloud
-        # is inconsistent between cloud drivers.  Replace libcloud NodeSize
-        # object with compatible CloudSizeWrapper object which merges the size
-        # info reported from the cloud with size information from the
-        # configuration file.
-        self.cloud_node.size = self.cloud_size
-
-        self._logger.info("Cloud node %s created.", self.cloud_node.id)
-        self._later.update_arvados_node_properties()
-
-    @ComputeNodeStateChangeBase._finish_on_exception
-    @RetryMixin._retry(config.ARVADOS_ERRORS)
-    def update_arvados_node_properties(self):
-        """Tell Arvados some details about the cloud node.
-
-        Currently we only include size/price from our request, which
-        we already knew before create_cloud_node(), but doing it here
-        gives us an opportunity to provide more detail from
-        self.cloud_node, too.
-        """
-        self.arvados_node['properties']['cloud_node'] = {
-            # Note this 'size' is the node size we asked the cloud
-            # driver to create -- not necessarily equal to the size
-            # reported by the cloud driver for the node that was
-            # created.
-            'size': self.cloud_size.id,
-            'price': self.cloud_size.price,
-        }
-        self.arvados_node = self._arvados.nodes().update(
-            uuid=self.arvados_node['uuid'],
-            body={'properties': self.arvados_node['properties']},
-        ).execute()
-        self._logger.info("%s updated properties.", self.arvados_node['uuid'])
-        self._later.post_create()
-
-    @RetryMixin._retry()
-    def post_create(self):
-        self._cloud.post_create_node(self.cloud_node)
-        self._logger.info("%s post-create work done.", self.cloud_node.id)
-        self._finished()
-
-    def stop_if_no_cloud_node(self):
-        if self.cloud_node is not None:
-            return False
-        self.stop()
-        return True
-
-
-class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
-    """Actor to shut down a compute node.
-
-    This actor simply destroys a cloud node, retrying as needed.
-    """
-    # Reasons for a shutdown to be cancelled.
-    WINDOW_CLOSED = "shutdown window closed"
-    DESTROY_FAILED = "destroy_node failed"
-
-    def __init__(self, timer_actor, cloud_client, arvados_client, node_monitor,
-                 cancellable=True, retry_wait=1, max_retry_wait=180):
-        # If a ShutdownActor is cancellable, it will ask the
-        # ComputeNodeMonitorActor if it's still eligible before taking each
-        # action, and stop the shutdown process if the node is no longer
-        # eligible.  Normal shutdowns based on job demand should be
-        # cancellable; shutdowns based on node misbehavior should not.
-        super(ComputeNodeShutdownActor, self).__init__(
-            cloud_client, arvados_client, timer_actor,
-            retry_wait, max_retry_wait)
-        self._monitor = node_monitor.proxy()
-        self.cloud_node = self._monitor.cloud_node.get()
-        self.cancellable = cancellable
-        self.cancel_reason = None
-        self.success = None
-
-    def _set_logger(self):
-        self._logger = logging.getLogger("%s.%s.%s" % (self.__class__.__name__, self.actor_urn[33:], self.cloud_node.name))
-
-    def on_start(self):
-        super(ComputeNodeShutdownActor, self).on_start()
-        self._later.shutdown_node()
-
-    def _arvados_node(self):
-        return self._monitor.arvados_node.get()
-
-    def _finished(self, success_flag=None):
-        if success_flag is not None:
-            self.success = success_flag
-        return super(ComputeNodeShutdownActor, self)._finished()
-
-    def cancel_shutdown(self, reason, **kwargs):
-        if not self.cancellable:
-            return False
-        if self.cancel_reason is not None:
-            # already cancelled
-            return False
-        self.cancel_reason = reason
-        self._logger.info("Shutdown cancelled: %s.", reason)
-        self._finished(success_flag=False)
-        return True
-
-    def _cancel_on_exception(orig_func):
-        @functools.wraps(orig_func)
-        def finish_wrapper(self, *args, **kwargs):
-            try:
-                return orig_func(self, *args, **kwargs)
-            except Exception as error:
-                self._logger.error("Actor error %s", error)
-                self._logger.debug("", exc_info=True)
-                self._later.cancel_shutdown("Unhandled exception %s" % error, try_resume=False)
-        return finish_wrapper
-
-    @_cancel_on_exception
-    def shutdown_node(self):
-        if self.cancel_reason is not None:
-            # already cancelled
-            return
-        if self.cancellable:
-            self._logger.info("Checking that node is still eligible for shutdown")
-            eligible, reason = self._monitor.shutdown_eligible().get()
-            if not eligible:
-                self.cancel_shutdown("No longer eligible for shut down because %s" % reason,
-                                     try_resume=True)
-                return
-        # If boot failed, count the event
-        if self._monitor.get_state().get() == 'unpaired':
-            status.tracker.counter_add('boot_failures')
-        self._destroy_node()
-
-    def _destroy_node(self):
-        self._logger.info("Starting shutdown")
-        arv_node = self._arvados_node()
-        if self._cloud.destroy_node(self.cloud_node):
-            self.cancellable = False
-            self._logger.info("Shutdown success")
-            if arv_node:
-                self._later.clean_arvados_node(arv_node)
-            else:
-                self._finished(success_flag=True)
-        else:
-            self.cancel_shutdown(self.DESTROY_FAILED, try_resume=False)
-
-    @ComputeNodeStateChangeBase._finish_on_exception
-    @RetryMixin._retry(config.ARVADOS_ERRORS)
-    def clean_arvados_node(self, arvados_node):
-        self._clean_arvados_node(arvados_node, "Shut down by Node Manager")
-        self._finished(success_flag=True)
-
-
-class ComputeNodeUpdateActor(config.actor_class, RetryMixin):
-    """Actor to dispatch one-off cloud management requests.
-
-    This actor receives requests for small cloud updates, and
-    dispatches them to a real driver.  ComputeNodeMonitorActors use
-    this to perform maintenance tasks on themselves.  Having a
-    dedicated actor for this gives us the opportunity to control the
-    flow of requests; e.g., by backing off when errors occur.
-    """
-    def __init__(self, cloud_factory, timer_actor, max_retry_wait=180):
-        super(ComputeNodeUpdateActor, self).__init__()
-        RetryMixin.__init__(self, 1, max_retry_wait,
-                            None, cloud_factory(), timer_actor)
-        self._cloud = cloud_factory()
-        self._later = self.actor_ref.tell_proxy()
-
-    def _set_logger(self):
-        self._logger = logging.getLogger("%s.%s" % (self.__class__.__name__, self.actor_urn[33:]))
-
-    def on_start(self):
-        self._set_logger()
-
-    @RetryMixin._retry()
-    def sync_node(self, cloud_node, arvados_node):
-        if self._cloud.node_fqdn(cloud_node) != arvados_node_fqdn(arvados_node):
-            return self._cloud.sync_node(cloud_node, arvados_node)
-
-
-class ComputeNodeMonitorActor(config.actor_class):
-    """Actor to manage a running compute node.
-
-    This actor gets updates about a compute node's cloud and Arvados records.
-    It uses this information to notify subscribers when the node is eligible
-    for shutdown.
-    """
-    def __init__(self, cloud_node, cloud_node_start_time, shutdown_timer,
-                 timer_actor, update_actor, cloud_client,
-                 arvados_node=None, poll_stale_after=600, node_stale_after=3600,
-                 boot_fail_after=1800, consecutive_idle_count=0
-    ):
-        super(ComputeNodeMonitorActor, self).__init__()
-        self._later = self.actor_ref.tell_proxy()
-        self._shutdowns = shutdown_timer
-        self._timer = timer_actor
-        self._update = update_actor
-        self._cloud = cloud_client
-        self.cloud_node = cloud_node
-        self.cloud_node_start_time = cloud_node_start_time
-        self.poll_stale_after = poll_stale_after
-        self.node_stale_after = node_stale_after
-        self.boot_fail_after = boot_fail_after
-        self.subscribers = set()
-        self.arvados_node = None
-        self.consecutive_idle_count = consecutive_idle_count
-        self.consecutive_idle = 0
-        self._later.update_arvados_node(arvados_node)
-        self.last_shutdown_opening = None
-        self._later.consider_shutdown()
-
-    def _set_logger(self):
-        self._logger = logging.getLogger("%s.%s.%s" % (self.__class__.__name__, self.actor_urn[33:], self.cloud_node.name))
-
-    def on_start(self):
-        self._set_logger()
-        self._timer.schedule(self.cloud_node_start_time + self.boot_fail_after, self._later.consider_shutdown)
-
-    def subscribe(self, subscriber):
-        self.subscribers.add(subscriber)
-
-    def _debug(self, msg, *args):
-        self._logger.debug(msg, *args)
-
-    def get_state(self):
-        """Get node state, one of ['unpaired', 'busy', 'idle', 'down']."""
-
-        # If this node is not associated with an Arvados node, return
-        # 'unpaired' if we're in the boot grace period, and 'down' if not,
-        # so it isn't counted towards usable nodes.
-        if self.arvados_node is None:
-            if timestamp_fresh(self.cloud_node_start_time,
-                               self.boot_fail_after):
-                return 'unpaired'
-            else:
-                return 'down'
-
-        state = self.arvados_node['crunch_worker_state']
-
-        # If state information is not available because it is missing or the
-        # record is stale, return 'down'.
-        if not state or not timestamp_fresh(arvados_node_mtime(self.arvados_node),
-                                            self.node_stale_after):
-            state = 'down'
-
-        # There's a window between when a node pings for the first time and the
-        # value of 'slurm_state' is synchronized by crunch-dispatch.  In this
-        # window, the node will still report as 'down'.  Check that
-        # first_ping_at is truthy and consider the node 'idle' during the
-        # initial boot grace period.
-        if (state == 'down' and
-            self.arvados_node['first_ping_at'] and
-            timestamp_fresh(self.cloud_node_start_time,
-                            self.boot_fail_after) and
-            not self._cloud.broken(self.cloud_node)):
-            state = 'idle'
-
-        # "missing" means last_ping_at is stale, this should be
-        # considered "down"
-        if arvados_node_missing(self.arvados_node, self.node_stale_after):
-            state = 'down'
-
-        # Turns out using 'job_uuid' this way is a bad idea.  The node record
-        # is assigned the job_uuid before the job is locked (which removes it
-        # from the queue) which means the job will be double-counted as both in
-        # the wishlist and but also keeping a node busy.  This end result is
-        # excess nodes being booted.
-        #if state == 'idle' and self.arvados_node['job_uuid']:
-        #    state = 'busy'
-
-        # Update idle node times tracker
-        if state == 'idle':
-            status.tracker.idle_in(self.arvados_node['hostname'])
-        else:
-            status.tracker.idle_out(self.arvados_node['hostname'])
-
-        return state
-
-    def in_state(self, *states):
-        return self.get_state() in states
-
-    def shutdown_eligible(self):
-        """Determine if node is candidate for shut down.
-
-        Returns a tuple of (boolean, string) where the first value is whether
-        the node is candidate for shut down, and the second value is the
-        reason for the decision.
-        """
-
-        # If this node's size is invalid (because it has a stale arvados_node_size
-        # tag), return True so that it's properly shut down.
-        if self.cloud_node.size.id == 'invalid':
-            return (True, "node's size tag '%s' not recognizable" % (self.cloud_node.extra['arvados_node_size'],))
-
-        # Collect states and then consult state transition table whether we
-        # should shut down.  Possible states are:
-        # crunch_worker_state = ['unpaired', 'busy', 'idle', 'down']
-        # window = ["open", "closed"]
-        # boot_grace = ["boot wait", "boot exceeded"]
-        # idle_grace = ["not idle", "idle wait", "idle exceeded"]
-
-        if self.arvados_node and not timestamp_fresh(arvados_node_mtime(self.arvados_node), self.node_stale_after):
-            return (False, "node state is stale")
-
-        crunch_worker_state = self.get_state()
-
-        window = "open" if self._shutdowns.window_open() else "closed"
-
-        if timestamp_fresh(self.cloud_node_start_time, self.boot_fail_after):
-            boot_grace = "boot wait"
-        else:
-            boot_grace = "boot exceeded"
-
-        if crunch_worker_state == "idle":
-            # Must report as "idle" at least "consecutive_idle_count" times
-            if self.consecutive_idle < self.consecutive_idle_count:
-                idle_grace = 'idle wait'
-            else:
-                idle_grace = 'idle exceeded'
-        else:
-            idle_grace = 'not idle'
-
-        node_state = (crunch_worker_state, window, boot_grace, idle_grace)
-        t = transitions[node_state]
-        if t is not None:
-            # yes, shutdown eligible
-            return (True, "node state is %s" % (node_state,))
-        else:
-            # no, return a reason
-            return (False, "node state is %s" % (node_state,))
-
-    def consider_shutdown(self):
-        try:
-            eligible, reason = self.shutdown_eligible()
-            next_opening = self._shutdowns.next_opening()
-            if eligible:
-                self._debug("Suggesting shutdown because %s", reason)
-                _notify_subscribers(self.actor_ref.proxy(), self.subscribers)
-            else:
-                self._debug("Not eligible for shut down because %s", reason)
-
-                if self.last_shutdown_opening != next_opening:
-                    self._debug("Shutdown window closed.  Next at %s.",
-                                time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(next_opening)))
-                    self._timer.schedule(next_opening, self._later.consider_shutdown)
-                    self.last_shutdown_opening = next_opening
-        except Exception:
-            self._logger.exception("Unexpected exception")
-
-    def offer_arvados_pair(self, arvados_node):
-        first_ping_s = arvados_node.get('first_ping_at')
-        if (self.arvados_node is not None) or (not first_ping_s):
-            return None
-        elif ((arvados_node['info'].get('ec2_instance_id') == self._cloud.node_id(self.cloud_node)) and
-              (arvados_timestamp(first_ping_s) >= self.cloud_node_start_time)):
-            self._later.update_arvados_node(arvados_node)
-            return self.cloud_node.id
-        else:
-            return None
-
-    def update_cloud_node(self, cloud_node):
-        if cloud_node is not None:
-            self.cloud_node = cloud_node
-            self._later.consider_shutdown()
-
-    def update_arvados_node(self, arvados_node):
-        """Called when the latest Arvados node record is retrieved.
-
-        Calls the updater's sync_node() method.
-
-        """
-        # This method is a little unusual in the way it just fires off the
-        # request without checking the result or retrying errors.  That's
-        # because this update happens every time we reload the Arvados node
-        # list: if a previous sync attempt failed, we'll see that the names
-        # are out of sync and just try again.  ComputeNodeUpdateActor has
-        # the logic to throttle those effective retries when there's trouble.
-        if arvados_node is not None:
-            self.arvados_node = arvados_node
-            self._update.sync_node(self.cloud_node, self.arvados_node)
-            if self.arvados_node['crunch_worker_state'] == "idle":
-                self.consecutive_idle += 1
-            else:
-                self.consecutive_idle = 0
-            self._later.consider_shutdown()
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
deleted file mode 100644 (file)
index 5b7785a..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import subprocess32 as subprocess
-import time
-
-from . import ComputeNodeMonitorActor
-from . import ComputeNodeSetupActor as SetupActorBase
-from . import ComputeNodeShutdownActor as ShutdownActorBase
-from . import ComputeNodeUpdateActor as UpdateActorBase
-from .. import RetryMixin
-
-class SlurmMixin(object):
-    SLURM_END_STATES = frozenset(['down\n', 'down*\n',
-                                  'drain\n', 'drain*\n',
-                                  'fail\n', 'fail*\n'])
-    SLURM_DRAIN_STATES = frozenset(['drain\n', 'drng\n'])
-
-    def _update_slurm_node(self, nodename, updates):
-        cmd = ['scontrol', 'update', 'NodeName=' + nodename] + updates
-        try:
-            subprocess.check_output(cmd)
-        except:
-            self._logger.error(
-                "SLURM update %r failed", cmd, exc_info=True)
-
-    def _update_slurm_size_attrs(self, nodename, size):
-        self._update_slurm_node(nodename, [
-            'Weight=%i' % int(size.price * 1000),
-            'Features=instancetype=' + size.id,
-        ])
-
-    def _get_slurm_state(self, nodename):
-        return subprocess.check_output(['sinfo', '--noheader', '-o', '%t', '-n', nodename])
-
-
-class ComputeNodeSetupActor(SlurmMixin, SetupActorBase):
-    def create_cloud_node(self):
-        hostname = self.arvados_node.get("hostname")
-        if hostname:
-            self._update_slurm_size_attrs(hostname, self.cloud_size)
-        return super(ComputeNodeSetupActor, self).create_cloud_node()
-
-
-class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
-    def on_start(self):
-        arv_node = self._arvados_node()
-        if arv_node is None:
-            self._nodename = None
-            return super(ComputeNodeShutdownActor, self).on_start()
-        else:
-            self._set_logger()
-            self._nodename = arv_node['hostname']
-            self._logger.info("Draining SLURM node %s", self._nodename)
-            self._later.issue_slurm_drain()
-
-    @RetryMixin._retry((subprocess.CalledProcessError, OSError))
-    def cancel_shutdown(self, reason, try_resume=True):
-        if self._nodename:
-            if try_resume and self._get_slurm_state(self._nodename) in self.SLURM_DRAIN_STATES:
-                # Resume from "drng" or "drain"
-                self._update_slurm_node(self._nodename, ['State=RESUME'])
-            else:
-                # Node is in a state such as 'idle' or 'alloc' so don't
-                # try to resume it because that will just raise an error.
-                pass
-        return super(ComputeNodeShutdownActor, self).cancel_shutdown(reason)
-
-    @RetryMixin._retry((subprocess.CalledProcessError, OSError))
-    def issue_slurm_drain(self):
-        if self.cancel_reason is not None:
-            return
-        if self._nodename:
-            self._update_slurm_node(self._nodename, [
-                'State=DRAIN', 'Reason=Node Manager shutdown'])
-            self._logger.info("Waiting for SLURM node %s to drain", self._nodename)
-            self._later.await_slurm_drain()
-        else:
-            self._later.shutdown_node()
-
-    @RetryMixin._retry((subprocess.CalledProcessError, OSError))
-    def await_slurm_drain(self):
-        if self.cancel_reason is not None:
-            return
-        output = self._get_slurm_state(self._nodename)
-        if output in ("drng\n", "alloc\n", "drng*\n", "alloc*\n"):
-            self._timer.schedule(time.time() + 10,
-                                 self._later.await_slurm_drain)
-        elif output in ("idle\n",):
-            # Not in "drng" but idle, don't shut down
-            self.cancel_shutdown("slurm state is %s" % output.strip(), try_resume=False)
-        else:
-            # any other state.
-            self._later.shutdown_node()
-
-    def _destroy_node(self):
-        if self._nodename:
-            self._update_slurm_node(self._nodename, [
-                'State=DOWN', 'Reason=Node Manager shutdown'])
-        super(ComputeNodeShutdownActor, self)._destroy_node()
-
-
-class ComputeNodeUpdateActor(SlurmMixin, UpdateActorBase):
-    def sync_node(self, cloud_node, arvados_node):
-        """Keep SLURM's node properties up to date."""
-        hostname = arvados_node.get("hostname")
-        features = arvados_node.get("slurm_node_features", "").split(",")
-        sizefeature = "instancetype=" + cloud_node.size.id
-        if hostname and sizefeature not in features:
-            # This probably means SLURM has restarted and lost our
-            # dynamically configured node weights and features.
-            self._update_slurm_size_attrs(hostname, cloud_node.size)
-        return super(ComputeNodeUpdateActor, self).sync_node(
-            cloud_node, arvados_node)
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/transitions.py b/services/nodemanager/arvnodeman/computenode/dispatch/transitions.py
deleted file mode 100644 (file)
index 93f50c1..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-transitions = {
- ('busy', 'closed', 'boot exceeded', 'idle exceeded'): None,
- ('busy', 'closed', 'boot exceeded', 'idle wait'): None,
- ('busy', 'closed', 'boot exceeded', 'not idle'): None,
- ('busy', 'closed', 'boot wait', 'idle exceeded'): None,
- ('busy', 'closed', 'boot wait', 'idle wait'): None,
- ('busy', 'closed', 'boot wait', 'not idle'): None,
- ('busy', 'open', 'boot exceeded', 'idle exceeded'): None,
- ('busy', 'open', 'boot exceeded', 'idle wait'): None,
- ('busy', 'open', 'boot exceeded', 'not idle'): None,
- ('busy', 'open', 'boot wait', 'idle exceeded'): None,
- ('busy', 'open', 'boot wait', 'idle wait'): None,
- ('busy', 'open', 'boot wait', 'not idle'): None,
-
- ('down', 'closed', 'boot exceeded', 'idle exceeded'): "START_SHUTDOWN",
- ('down', 'closed', 'boot exceeded', 'idle wait'): "START_SHUTDOWN",
- ('down', 'closed', 'boot exceeded', 'not idle'): "START_SHUTDOWN",
- ('down', 'closed', 'boot wait', 'idle exceeded'): None,
- ('down', 'closed', 'boot wait', 'idle wait'): None,
- ('down', 'closed', 'boot wait', 'not idle'): None,
- ('down', 'open', 'boot exceeded', 'idle exceeded'): "START_SHUTDOWN",
- ('down', 'open', 'boot exceeded', 'idle wait'): "START_SHUTDOWN",
- ('down', 'open', 'boot exceeded', 'not idle'): "START_SHUTDOWN",
- ('down', 'open', 'boot wait', 'idle exceeded'): "START_SHUTDOWN",
- ('down', 'open', 'boot wait', 'idle wait'): "START_SHUTDOWN",
- ('down', 'open', 'boot wait', 'not idle'): "START_SHUTDOWN",
-
- ('idle', 'closed', 'boot exceeded', 'idle exceeded'): None,
- ('idle', 'closed', 'boot exceeded', 'idle wait'): None,
- ('idle', 'closed', 'boot exceeded', 'not idle'): None,
- ('idle', 'closed', 'boot wait', 'idle exceeded'): None,
- ('idle', 'closed', 'boot wait', 'idle wait'): None,
- ('idle', 'closed', 'boot wait', 'not idle'): None,
- ('idle', 'open', 'boot exceeded', 'idle exceeded'): "START_DRAIN",
- ('idle', 'open', 'boot exceeded', 'idle wait'): None,
- ('idle', 'open', 'boot exceeded', 'not idle'): None,
- ('idle', 'open', 'boot wait', 'idle exceeded'): "START_DRAIN",
- ('idle', 'open', 'boot wait', 'idle wait'): None,
- ('idle', 'open', 'boot wait', 'not idle'): None,
-
- ('unpaired', 'closed', 'boot exceeded', 'idle exceeded'): "START_SHUTDOWN",
- ('unpaired', 'closed', 'boot exceeded', 'idle wait'): "START_SHUTDOWN",
- ('unpaired', 'closed', 'boot exceeded', 'not idle'): "START_SHUTDOWN",
- ('unpaired', 'closed', 'boot wait', 'idle exceeded'): None,
- ('unpaired', 'closed', 'boot wait', 'idle wait'): None,
- ('unpaired', 'closed', 'boot wait', 'not idle'): None,
- ('unpaired', 'open', 'boot exceeded', 'idle exceeded'): "START_SHUTDOWN",
- ('unpaired', 'open', 'boot exceeded', 'idle wait'): "START_SHUTDOWN",
- ('unpaired', 'open', 'boot exceeded', 'not idle'): "START_SHUTDOWN",
- ('unpaired', 'open', 'boot wait', 'idle exceeded'): None,
- ('unpaired', 'open', 'boot wait', 'idle wait'): None,
- ('unpaired', 'open', 'boot wait', 'not idle'): None,
-
- ('fail', 'closed', 'boot exceeded', 'idle exceeded'): "START_SHUTDOWN",
- ('fail', 'closed', 'boot exceeded', 'idle wait'): "START_SHUTDOWN",
- ('fail', 'closed', 'boot exceeded', 'not idle'): "START_SHUTDOWN",
- ('fail', 'closed', 'boot wait', 'idle exceeded'): "START_SHUTDOWN",
- ('fail', 'closed', 'boot wait', 'idle wait'): "START_SHUTDOWN",
- ('fail', 'closed', 'boot wait', 'not idle'): "START_SHUTDOWN",
- ('fail', 'open', 'boot exceeded', 'idle exceeded'): "START_SHUTDOWN",
- ('fail', 'open', 'boot exceeded', 'idle wait'): "START_SHUTDOWN",
- ('fail', 'open', 'boot exceeded', 'not idle'): "START_SHUTDOWN",
- ('fail', 'open', 'boot wait', 'idle exceeded'): "START_SHUTDOWN",
- ('fail', 'open', 'boot wait', 'idle wait'): "START_SHUTDOWN",
- ('fail', 'open', 'boot wait', 'not idle'): "START_SHUTDOWN"}
diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
deleted file mode 100644 (file)
index 48d19f5..0000000
+++ /dev/null
@@ -1,253 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import logging
-from operator import attrgetter
-
-import libcloud.common.types as cloud_types
-from libcloud.compute.base import NodeDriver, NodeAuthSSHKey
-
-from ...config import CLOUD_ERRORS
-from ...status import tracker
-from .. import RetryMixin
-
-class BaseComputeNodeDriver(RetryMixin):
-    """Abstract base class for compute node drivers.
-
-    libcloud drivers abstract away many of the differences between
-    cloud providers, but managing compute nodes requires some
-    cloud-specific features (e.g., keeping track of node FQDNs and
-    boot times).  Compute node drivers are responsible for translating
-    the node manager's cloud requests to a specific cloud's
-    vocabulary.
-
-    Subclasses must implement arvados_create_kwargs, sync_node,
-    node_fqdn, and node_start_time.
-    """
-
-
-    @RetryMixin._retry()
-    def _create_driver(self, driver_class, **auth_kwargs):
-        return driver_class(**auth_kwargs)
-
-    @RetryMixin._retry()
-    def sizes(self):
-        if self._sizes is None:
-            self._sizes = {sz.id: sz for sz in self.real.list_sizes()}
-        return self._sizes
-
-    def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
-                 driver_class, retry_wait=1, max_retry_wait=180):
-        """Base initializer for compute node drivers.
-
-        Arguments:
-        * auth_kwargs: A dictionary of arguments that are passed into the
-          driver_class constructor to instantiate a libcloud driver.
-        * list_kwargs: A dictionary of arguments that are passed to the
-          libcloud driver's list_nodes method to return the list of compute
-          nodes.
-        * create_kwargs: A dictionary of arguments that are passed to the
-          libcloud driver's create_node method to create a new compute node.
-        * driver_class: The class of a libcloud driver to use.
-        """
-
-        super(BaseComputeNodeDriver, self).__init__(retry_wait, max_retry_wait,
-                                         logging.getLogger(self.__class__.__name__),
-                                         type(self),
-                                         None)
-        self.real = self._create_driver(driver_class, **auth_kwargs)
-        self.list_kwargs = list_kwargs
-        self.create_kwargs = create_kwargs
-        # Transform entries in create_kwargs.  For each key K, if this class
-        # has an _init_K method, remove the entry and call _init_K with the
-        # corresponding value.  If _init_K returns None, the entry stays out
-        # of the dictionary (we expect we're holding the value somewhere
-        # else, like an instance variable).  Otherwise, _init_K returns a
-        # key-value tuple pair, and we add that entry to create_kwargs.
-        for key in self.create_kwargs.keys():
-            init_method = getattr(self, '_init_' + key, None)
-            if init_method is not None:
-                new_pair = init_method(self.create_kwargs.pop(key))
-                if new_pair is not None:
-                    self.create_kwargs[new_pair[0]] = new_pair[1]
-
-        self._sizes = None
-
-    def _init_ping_host(self, ping_host):
-        self.ping_host = ping_host
-
-    def _init_ssh_key(self, filename):
-        with open(filename) as ssh_file:
-            key = NodeAuthSSHKey(ssh_file.read())
-        return 'auth', key
-
-    def search_for_now(self, term, list_method, key=attrgetter('id'), **kwargs):
-        """Return one matching item from a list of cloud objects.
-
-        Raises ValueError if the number of matching objects is not exactly 1.
-
-        Arguments:
-        * term: The value that identifies a matching item.
-        * list_method: A string that names the method to call for a
-          list of objects.
-        * key: A function that accepts a cloud object and returns a
-          value search for a `term` match on each item.  Returns the
-          object's 'id' attribute by default.
-        """
-        try:
-            list_func = getattr(self, list_method)
-        except AttributeError:
-            list_func = getattr(self.real, list_method)
-        items = list_func(**kwargs)
-        results = [item for item in items if key(item) == term]
-        count = len(results)
-        if count != 1:
-            raise ValueError("{} returned {} results for {!r}".format(
-                    list_method, count, term))
-        return results[0]
-
-    def search_for(self, term, list_method, key=attrgetter('id'), **kwargs):
-        """Return one cached matching item from a list of cloud objects.
-
-        See search_for_now() for details of arguments and exceptions.
-        This method caches results, so it's good to find static cloud objects
-        like node sizes, regions, etc.
-        """
-        cache_key = (list_method, term)
-        if cache_key not in self.SEARCH_CACHE:
-            self.SEARCH_CACHE[cache_key] = self.search_for_now(
-                term, list_method, key, **kwargs)
-        return self.SEARCH_CACHE[cache_key]
-
-    def list_nodes(self, **kwargs):
-        l = self.list_kwargs.copy()
-        l.update(kwargs)
-        try:
-            return self.real.list_nodes(**l)
-        except CLOUD_ERRORS:
-            tracker.counter_add('list_nodes_errors')
-            raise
-
-    def create_cloud_name(self, arvados_node):
-        """Return a cloud node name for the given Arvados node record.
-
-        Subclasses must override this method.  It should return a string
-        that can be used as the name for a newly-created cloud node,
-        based on identifying information in the Arvados node record.
-
-        Arguments:
-        * arvados_node: This Arvados node record to seed the new cloud node.
-        """
-        raise NotImplementedError("BaseComputeNodeDriver.create_cloud_name")
-
-    def arvados_create_kwargs(self, size, arvados_node):
-        """Return dynamic keyword arguments for create_node.
-
-        Subclasses must override this method.  It should return a dictionary
-        of keyword arguments to pass to the libcloud driver's create_node
-        method.  These arguments will extend the static arguments in
-        create_kwargs.
-
-        Arguments:
-        * size: The node size that will be created (libcloud NodeSize object)
-        * arvados_node: The Arvados node record that will be associated
-          with this cloud node, as returned from the API server.
-        """
-        raise NotImplementedError("BaseComputeNodeDriver.arvados_create_kwargs")
-
-    def broken(self, cloud_node):
-        """Return true if libcloud has indicated the node is in a "broken" state."""
-        return False
-
-    def _make_ping_url(self, arvados_node):
-        return 'https://{}/arvados/v1/nodes/{}/ping?ping_secret={}'.format(
-            self.ping_host, arvados_node['uuid'],
-            arvados_node['info']['ping_secret'])
-
-    @staticmethod
-    def _name_key(cloud_object):
-        return cloud_object.name
-
-    def create_node(self, size, arvados_node):
-        try:
-            kwargs = self.create_kwargs.copy()
-            kwargs.update(self.arvados_create_kwargs(size, arvados_node))
-            kwargs['size'] = size.real
-            return self.real.create_node(**kwargs)
-        except CLOUD_ERRORS as create_error:
-            # Workaround for bug #6702: sometimes the create node request
-            # succeeds but times out and raises an exception instead of
-            # returning a result.  If this happens, we get stuck in a retry
-            # loop forever because subsequent create_node attempts will fail
-            # due to node name collision.  So check if the node we intended to
-            # create shows up in the cloud node list and return it if found.
-            try:
-                return self.search_for_now(kwargs['name'], 'list_nodes', self._name_key)
-            except ValueError:
-                tracker.counter_add('create_node_errors')
-                raise create_error
-
-    def post_create_node(self, cloud_node):
-        # ComputeNodeSetupActor calls this method after the cloud node is
-        # created.  Any setup tasks that need to happen afterward (e.g.,
-        # tagging) should be done in this method.
-        pass
-
-    def sync_node(self, cloud_node, arvados_node):
-        # When a compute node first pings the API server, the API server
-        # will automatically assign some attributes on the corresponding
-        # node record, like hostname.  This method should propagate that
-        # information back to the cloud node appropriately.
-        raise NotImplementedError("BaseComputeNodeDriver.sync_node")
-
-    @classmethod
-    def node_fqdn(cls, node):
-        # This method should return the FQDN of the node object argument.
-        # Different clouds store this in different places.
-        raise NotImplementedError("BaseComputeNodeDriver.node_fqdn")
-
-    @classmethod
-    def node_start_time(cls, node):
-        # This method should return the time the node was started, in
-        # seconds since the epoch UTC.
-        raise NotImplementedError("BaseComputeNodeDriver.node_start_time")
-
-    def destroy_node(self, cloud_node):
-        try:
-            return self.real.destroy_node(cloud_node)
-        except CLOUD_ERRORS:
-            # Sometimes the destroy node request succeeds but times out and
-            # raises an exception instead of returning success.  If this
-            # happens, we get a noisy stack trace.  Check if the node is still
-            # on the node list.  If it is gone, we can declare victory.
-            try:
-                self.search_for_now(cloud_node.id, 'list_nodes')
-            except ValueError:
-                # If we catch ValueError, that means search_for_now didn't find
-                # it, which means destroy_node actually succeeded.
-                return True
-            # The node is still on the list.  Re-raise.
-            tracker.counter_add('destroy_node_errors')
-            raise
-
-    # Now that we've defined all our own methods, delegate generic, public
-    # attributes of libcloud drivers that we haven't defined ourselves.
-    def _delegate_to_real(attr_name):
-        return property(
-            lambda self: getattr(self.real, attr_name),
-            lambda self, value: setattr(self.real, attr_name, value),
-            doc=getattr(getattr(NodeDriver, attr_name), '__doc__', None))
-
-    # node id
-    @classmethod
-    def node_id(cls):
-        raise NotImplementedError("BaseComputeNodeDriver.node_id")
-
-    _locals = locals()
-    for _attr_name in dir(NodeDriver):
-        if (not _attr_name.startswith('_')) and (_attr_name not in _locals):
-            _locals[_attr_name] = _delegate_to_real(_attr_name)
diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py
deleted file mode 100644 (file)
index 35c8b5a..0000000
+++ /dev/null
@@ -1,112 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import pipes
-import time
-
-import libcloud.compute.base as cloud_base
-import libcloud.compute.providers as cloud_provider
-import libcloud.compute.types as cloud_types
-from libcloud.common.exceptions import BaseHTTPError
-
-from . import BaseComputeNodeDriver
-from .. import arvados_node_fqdn, arvados_timestamp, ARVADOS_TIMEFMT
-
-class ComputeNodeDriver(BaseComputeNodeDriver):
-
-    DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.AZURE_ARM)
-    SEARCH_CACHE = {}
-
-    def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
-                 driver_class=DEFAULT_DRIVER):
-
-        if not list_kwargs.get("ex_resource_group"):
-            raise Exception("Must include ex_resource_group in Cloud List configuration (list_kwargs)")
-
-        create_kwargs["ex_resource_group"] = list_kwargs["ex_resource_group"]
-
-        self.tags = {key[4:]: value
-                     for key, value in create_kwargs.iteritems()
-                     if key.startswith('tag_')}
-        # filter out tags from create_kwargs
-        create_kwargs = {key: value
-                         for key, value in create_kwargs.iteritems()
-                         if not key.startswith('tag_')}
-        super(ComputeNodeDriver, self).__init__(
-            auth_kwargs, list_kwargs, create_kwargs,
-            driver_class)
-
-    def create_cloud_name(self, arvados_node):
-        uuid_parts = arvados_node['uuid'].split('-', 2)
-        return 'compute-{parts[2]}-{parts[0]}'.format(parts=uuid_parts)
-
-    def arvados_create_kwargs(self, size, arvados_node):
-        tags = {
-            # Set up tag indicating the Arvados assigned Cloud Size id.
-            'arvados_node_size': size.id,
-            'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
-            'arv-ping-url': self._make_ping_url(arvados_node)
-        }
-        tags.update(self.tags)
-
-        name = self.create_cloud_name(arvados_node)
-        customdata = """#!/bin/sh
-mkdir -p    /var/tmp/arv-node-data/meta-data
-echo %s > /var/tmp/arv-node-data/arv-ping-url
-echo %s > /var/tmp/arv-node-data/meta-data/instance-id
-echo %s > /var/tmp/arv-node-data/meta-data/instance-type
-""" % (pipes.quote(tags['arv-ping-url']),
-       pipes.quote(name),
-       pipes.quote(size.id))
-
-        return {
-            'name': name,
-            'ex_tags': tags,
-            'ex_customdata': customdata
-        }
-
-    def sync_node(self, cloud_node, arvados_node):
-        try:
-            self.real.ex_create_tags(cloud_node,
-                                     {'hostname': arvados_node_fqdn(arvados_node)})
-            return True
-        except BaseHTTPError as b:
-            return False
-
-    def _init_image(self, urn):
-        return "image", self.get_image(urn)
-
-    def list_nodes(self):
-        # Azure only supports filtering node lists by resource group.
-        # Do our own filtering based on tag.
-        nodes = [node for node in
-                super(ComputeNodeDriver, self).list_nodes(ex_fetch_nic=False, ex_fetch_power_state=False)
-                if node.extra.get("tags", {}).get("arvados-class") == self.tags["arvados-class"]]
-        for n in nodes:
-            # Need to populate Node.size
-            if not n.size:
-                n.size = self.sizes()[n.extra["properties"]["hardwareProfile"]["vmSize"]]
-            n.extra['arvados_node_size'] = n.extra.get('tags', {}).get('arvados_node_size') or n.size.id
-        return nodes
-
-    def broken(self, cloud_node):
-        """Return true if libcloud has indicated the node is in a "broken" state."""
-        # UNKNOWN means the node state is unrecognized, which in practice means some combination
-        # of failure that the Azure libcloud driver doesn't know how to interpret.
-        return (cloud_node.state in (cloud_types.NodeState.ERROR, cloud_types.NodeState.UNKNOWN))
-
-    @classmethod
-    def node_fqdn(cls, node):
-        return node.extra["tags"].get("hostname")
-
-    @classmethod
-    def node_start_time(cls, node):
-        return arvados_timestamp(node.extra["tags"].get("booted_at"))
-
-    @classmethod
-    def node_id(cls, node):
-        return node.name
diff --git a/services/nodemanager/arvnodeman/computenode/driver/dummy.py b/services/nodemanager/arvnodeman/computenode/driver/dummy.py
deleted file mode 100644 (file)
index 14845ac..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import time
-
-import libcloud.compute.providers as cloud_provider
-import libcloud.compute.types as cloud_types
-
-from . import BaseComputeNodeDriver
-from .. import arvados_node_fqdn
-
-class ComputeNodeDriver(BaseComputeNodeDriver):
-    """Compute node driver wrapper for libcloud's dummy driver.
-
-    This class provides the glue necessary to run the node manager with a
-    dummy cloud.  It's useful for testing.
-    """
-    DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.DUMMY)
-    DEFAULT_REAL = DEFAULT_DRIVER('ComputeNodeDriver')
-    DUMMY_START_TIME = time.time()
-
-    def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
-                 driver_class=DEFAULT_DRIVER):
-        super(ComputeNodeDriver, self).__init__(
-            auth_kwargs, list_kwargs, create_kwargs, driver_class)
-        if driver_class is self.DEFAULT_DRIVER:
-            self.real = self.DEFAULT_REAL
-
-    def _ensure_private_ip(self, node):
-        if not node.private_ips:
-            node.private_ips = ['10.10.0.{}'.format(node.id)]
-
-    def arvados_create_kwargs(self, size, arvados_node):
-        return {}
-
-    def list_nodes(self):
-        nodelist = super(ComputeNodeDriver, self).list_nodes()
-        for node in nodelist:
-            self._ensure_private_ip(node)
-            node.size = self.sizes()["1"]
-        return nodelist
-
-    def create_node(self, size, arvados_node):
-        node = super(ComputeNodeDriver, self).create_node(size, arvados_node)
-        self._ensure_private_ip(node)
-        return node
-
-    def sync_node(self, cloud_node, arvados_node):
-        cloud_node.name = arvados_node_fqdn(arvados_node)
-
-    @classmethod
-    def node_fqdn(cls, node):
-        return node.name
-
-    @classmethod
-    def node_start_time(cls, node):
-        return cls.DUMMY_START_TIME
diff --git a/services/nodemanager/arvnodeman/computenode/driver/ec2.py b/services/nodemanager/arvnodeman/computenode/driver/ec2.py
deleted file mode 100644 (file)
index 418a9f9..0000000
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import time
-
-import libcloud.compute.base as cloud_base
-import libcloud.compute.providers as cloud_provider
-import libcloud.compute.types as cloud_types
-from libcloud.compute.drivers import ec2 as cloud_ec2
-
-from . import BaseComputeNodeDriver
-from .. import arvados_node_fqdn
-
-### Monkeypatch libcloud to support AWS' new SecurityGroup API.
-# These classes can be removed when libcloud support specifying
-# security groups with the SecurityGroupId parameter.
-class ANMEC2Connection(cloud_ec2.EC2Connection):
-    def request(self, *args, **kwargs):
-        params = kwargs.get('params')
-        if (params is not None) and (params.get('Action') == 'RunInstances'):
-            for key in params.keys():
-                if key.startswith('SecurityGroup.'):
-                    new_key = key.replace('Group.', 'GroupId.', 1)
-                    params[new_key] = params.pop(key).id
-            kwargs['params'] = params
-        return super(ANMEC2Connection, self).request(*args, **kwargs)
-
-
-class ANMEC2NodeDriver(cloud_ec2.EC2NodeDriver):
-    connectionCls = ANMEC2Connection
-
-
-class ComputeNodeDriver(BaseComputeNodeDriver):
-    """Compute node driver wrapper for EC2.
-
-    This translates cloud driver requests to EC2's specific parameters.
-    """
-    DEFAULT_DRIVER = ANMEC2NodeDriver
-### End monkeypatch
-    SEARCH_CACHE = {}
-
-    def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
-                 driver_class=DEFAULT_DRIVER):
-        # We need full lists of keys up front because these loops modify
-        # dictionaries in-place.
-        for key in list_kwargs.keys():
-            list_kwargs[key.replace('_', ':')] = list_kwargs.pop(key)
-        self.tags = {key[4:]: value
-                     for key, value in list_kwargs.iteritems()
-                     if key.startswith('tag:')}
-        # Tags are assigned at instance creation time
-        create_kwargs.setdefault('ex_metadata', {})
-        create_kwargs['ex_metadata'].update(self.tags)
-        super(ComputeNodeDriver, self).__init__(
-            auth_kwargs, {'ex_filters': list_kwargs}, create_kwargs,
-            driver_class)
-
-    def _init_image_id(self, image_id):
-        return 'image', self.search_for(image_id, 'list_images', ex_owner='self')
-
-    def _init_security_groups(self, group_names):
-        return 'ex_security_groups', [
-            self.search_for(gname.strip(), 'ex_get_security_groups')
-            for gname in group_names.split(',')]
-
-    def _init_subnet_id(self, subnet_id):
-        return 'ex_subnet', self.search_for(subnet_id, 'ex_list_subnets')
-
-    create_cloud_name = staticmethod(arvados_node_fqdn)
-
-    def arvados_create_kwargs(self, size, arvados_node):
-        kw = {'name': self.create_cloud_name(arvados_node),
-                'ex_userdata': self._make_ping_url(arvados_node)}
-        # libcloud/ec2 disk sizes are in GB, Arvados/SLURM "scratch" value is in MB
-        scratch = int(size.scratch / 1000) + 1
-        if scratch > size.disk:
-            volsize = scratch - size.disk
-            if volsize > 16384:
-                # Must be 1-16384 for General Purpose SSD (gp2) devices
-                # https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_EbsBlockDevice.html
-                self._logger.warning("Requested EBS volume size %d is too large, capping size request to 16384 GB", volsize)
-                volsize = 16384
-            kw["ex_blockdevicemappings"] = [{
-                "DeviceName": "/dev/xvdt",
-                "Ebs": {
-                    "DeleteOnTermination": True,
-                    "VolumeSize": volsize,
-                    "VolumeType": "gp2"
-                }}]
-        if size.preemptible:
-            # Request a Spot instance for this node
-            kw['ex_spot_market'] = True
-        return kw
-
-    def sync_node(self, cloud_node, arvados_node):
-        self.real.ex_create_tags(cloud_node,
-                                 {'Name': arvados_node_fqdn(arvados_node)})
-
-    def create_node(self, size, arvados_node):
-        # Set up tag indicating the Arvados assigned Cloud Size id.
-        self.create_kwargs['ex_metadata'].update({'arvados_node_size': size.id})
-        return super(ComputeNodeDriver, self).create_node(size, arvados_node)
-
-    def list_nodes(self):
-        # Need to populate Node.size
-        nodes = super(ComputeNodeDriver, self).list_nodes()
-        for n in nodes:
-            if not n.size:
-                n.size = self.sizes()[n.extra["instance_type"]]
-            n.extra['arvados_node_size'] = n.extra.get('tags', {}).get('arvados_node_size') or n.size.id
-        return nodes
-
-    @classmethod
-    def node_fqdn(cls, node):
-        return node.name
-
-    @classmethod
-    def node_start_time(cls, node):
-        time_str = node.extra['launch_time'].split('.', 2)[0] + 'UTC'
-        return time.mktime(time.strptime(
-                time_str,'%Y-%m-%dT%H:%M:%S%Z')) - time.timezone
-
-    @classmethod
-    def node_id(cls, node):
-        return node.id
diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py
deleted file mode 100644 (file)
index 23a1017..0000000
+++ /dev/null
@@ -1,181 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import functools
-import json
-import time
-
-import libcloud.compute.providers as cloud_provider
-import libcloud.compute.types as cloud_types
-
-from . import BaseComputeNodeDriver
-from .. import arvados_node_fqdn, arvados_timestamp, ARVADOS_TIMEFMT
-
-class ComputeNodeDriver(BaseComputeNodeDriver):
-    """Compute node driver wrapper for GCE
-
-    This translates cloud driver requests to GCE's specific parameters.
-    """
-    DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.GCE)
-    SEARCH_CACHE = {}
-
-    def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
-                 driver_class=DEFAULT_DRIVER):
-        list_kwargs = list_kwargs.copy()
-        tags_str = list_kwargs.pop('tags', '')
-        if not tags_str.strip():
-            self.node_tags = frozenset()
-        else:
-            self.node_tags = frozenset(t.strip() for t in tags_str.split(','))
-        create_kwargs = create_kwargs.copy()
-        create_kwargs.setdefault('external_ip', None)
-        create_kwargs.setdefault('ex_metadata', {})
-        self._project = auth_kwargs.get("project")
-        super(ComputeNodeDriver, self).__init__(
-            auth_kwargs, list_kwargs, create_kwargs,
-            driver_class)
-        self._disktype_links = {dt.name: self._object_link(dt)
-                                for dt in self.real.ex_list_disktypes()}
-
-    @staticmethod
-    def _object_link(cloud_object):
-        return cloud_object.extra.get('selfLink')
-
-    def _init_image(self, image_name):
-        return 'image', self.search_for(
-            image_name, 'list_images', self._name_key, ex_project=self._project)
-
-    def _init_network(self, network_name):
-        return 'ex_network', self.search_for(
-            network_name, 'ex_list_networks', self._name_key)
-
-    def _init_service_accounts(self, service_accounts_str):
-        return 'ex_service_accounts', json.loads(service_accounts_str)
-
-    def _init_ssh_key(self, filename):
-        # SSH keys are delivered to GCE nodes via ex_metadata: see
-        # http://stackoverflow.com/questions/26752617/creating-sshkeys-for-gce-instance-using-libcloud
-        with open(filename) as ssh_file:
-            self.create_kwargs['ex_metadata']['sshKeys'] = (
-                'root:' + ssh_file.read().strip())
-
-    def create_cloud_name(self, arvados_node):
-        uuid_parts = arvados_node['uuid'].split('-', 2)
-        return 'compute-{parts[2]}-{parts[0]}'.format(parts=uuid_parts)
-
-    def arvados_create_kwargs(self, size, arvados_node):
-        name = self.create_cloud_name(arvados_node)
-
-        if size.scratch > 375000:
-            self._logger.warning("Requested %d MB scratch space, but GCE driver currently only supports attaching a single 375 GB disk.", size.scratch)
-
-        disks = [
-            {'autoDelete': True,
-             'boot': True,
-             'deviceName': name,
-             'initializeParams':
-                 {'diskName': name,
-                  'diskType': self._disktype_links['pd-standard'],
-                  'sourceImage': self._object_link(self.create_kwargs['image']),
-                  },
-             'type': 'PERSISTENT',
-             },
-            {'autoDelete': True,
-             'boot': False,
-             # Boot images rely on this device name to find the SSD.
-             # Any change must be coordinated in the image.
-             'deviceName': 'tmp',
-             'initializeParams':
-                 {'diskType': self._disktype_links['local-ssd'],
-                  },
-             'type': 'SCRATCH',
-             },
-            ]
-        result = {'name': name,
-                  'ex_metadata': self.create_kwargs['ex_metadata'].copy(),
-                  'ex_tags': list(self.node_tags),
-                  'ex_disks_gce_struct': disks,
-                  }
-        result['ex_metadata'].update({
-            'arvados_node_size': size.id,
-            'arv-ping-url': self._make_ping_url(arvados_node),
-            'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
-            'hostname': arvados_node_fqdn(arvados_node),
-        })
-        return result
-
-    def list_nodes(self):
-        # The GCE libcloud driver only supports filtering node lists by zone.
-        # Do our own filtering based on tag list.
-        nodelist = [node for node in
-                    super(ComputeNodeDriver, self).list_nodes()
-                    if self.node_tags.issubset(node.extra.get('tags', []))]
-        for node in nodelist:
-            # As of 0.18, the libcloud GCE driver sets node.size to the size's name.
-            # It's supposed to be the actual size object.  Check that it's not,
-            # and monkeypatch the results when that's the case.
-            if not hasattr(node.size, 'id'):
-                node.size = self.sizes()[node.size]
-            # Get arvados-assigned cloud size id
-            node.extra['arvados_node_size'] = node.extra.get('metadata', {}).get('arvados_node_size') or node.size.id
-        return nodelist
-
-    @classmethod
-    def _find_metadata(cls, metadata_items, key):
-        # Given a list of two-item metadata dictonaries, return the one with
-        # the named key.  Raise KeyError if not found.
-        try:
-            return next(data_dict for data_dict in metadata_items
-                        if data_dict.get('key') == key)
-        except StopIteration:
-            raise KeyError(key)
-
-    @classmethod
-    def _get_metadata(cls, metadata_items, key, *default):
-        try:
-            return cls._find_metadata(metadata_items, key)['value']
-        except KeyError:
-            if default:
-                return default[0]
-            raise
-
-    def sync_node(self, cloud_node, arvados_node):
-        # Update the cloud node record to ensure we have the correct metadata
-        # fingerprint.
-        cloud_node = self.real.ex_get_node(cloud_node.name, cloud_node.extra['zone'])
-
-        # We can't store the FQDN on the name attribute or anything like it,
-        # because (a) names are static throughout the node's life (so FQDN
-        # isn't available because we don't know it at node creation time) and
-        # (b) it can't contain dots.  Instead stash it in metadata.
-        hostname = arvados_node_fqdn(arvados_node)
-        metadata_req = cloud_node.extra['metadata'].copy()
-        metadata_items = metadata_req.setdefault('items', [])
-        try:
-            self._find_metadata(metadata_items, 'hostname')['value'] = hostname
-        except KeyError:
-            metadata_items.append({'key': 'hostname', 'value': hostname})
-
-        self.real.ex_set_node_metadata(cloud_node, metadata_items)
-
-    @classmethod
-    def node_fqdn(cls, node):
-        # See sync_node comment.
-        return cls._get_metadata(node.extra['metadata'].get('items', []),
-                                 'hostname', '')
-
-    @classmethod
-    def node_start_time(cls, node):
-        try:
-            return arvados_timestamp(cls._get_metadata(
-                    node.extra['metadata']['items'], 'booted_at'))
-        except KeyError:
-            return 0
-
-    @classmethod
-    def node_id(cls, node):
-        return node.id
diff --git a/services/nodemanager/arvnodeman/config.py b/services/nodemanager/arvnodeman/config.py
deleted file mode 100644 (file)
index 4857e89..0000000
+++ /dev/null
@@ -1,184 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import ConfigParser
-import importlib
-import logging
-import sys
-
-import arvados
-import httplib2
-import pykka
-from apiclient import errors as apierror
-
-from .baseactor import BaseNodeManagerActor
-
-from functools import partial
-from libcloud.common.types import LibcloudError
-from libcloud.common.exceptions import BaseHTTPError
-
-# IOError is the base class for socket.error, ssl.SSLError, and friends.
-# It seems like it hits the sweet spot for operations we want to retry:
-# it's low-level, but unlikely to catch code bugs.
-NETWORK_ERRORS = (IOError,)
-ARVADOS_ERRORS = NETWORK_ERRORS + (apierror.Error,)
-CLOUD_ERRORS = NETWORK_ERRORS + (LibcloudError, BaseHTTPError)
-
-actor_class = BaseNodeManagerActor
-
-class NodeManagerConfig(ConfigParser.SafeConfigParser):
-    """Node Manager Configuration class.
-
-    This a standard Python ConfigParser, with additional helper methods to
-    create objects instantiated with configuration information.
-    """
-
-    LOGGING_NONLEVELS = frozenset(['file'])
-
-    def __init__(self, *args, **kwargs):
-        # Can't use super() because SafeConfigParser is an old-style class.
-        ConfigParser.SafeConfigParser.__init__(self, *args, **kwargs)
-        for sec_name, settings in {
-            'Arvados': {'insecure': 'no',
-                        'timeout': '15',
-                        'jobs_queue': 'yes',
-                        'slurm_queue': 'yes'
-                    },
-            'Daemon': {'min_nodes': '0',
-                       'max_nodes': '1',
-                       'poll_time': '60',
-                       'cloudlist_poll_time': '0',
-                       'nodelist_poll_time': '0',
-                       'wishlist_poll_time': '0',
-                       'max_poll_time': '300',
-                       'poll_stale_after': '600',
-                       'max_total_price': '0',
-                       'boot_fail_after': str(sys.maxint),
-                       'node_stale_after': str(60 * 60 * 2),
-                       'watchdog': '600',
-                       'node_mem_scaling': '0.95',
-                       'consecutive_idle_count': '2'},
-            'Manage': {'address': '127.0.0.1',
-                       'port': '-1',
-                       'ManagementToken': ''},
-            'Logging': {'file': '/dev/stderr',
-                        'level': 'WARNING'}
-        }.iteritems():
-            if not self.has_section(sec_name):
-                self.add_section(sec_name)
-            for opt_name, value in settings.iteritems():
-                if not self.has_option(sec_name, opt_name):
-                    self.set(sec_name, opt_name, value)
-
-    def get_section(self, section, transformers={}, default_transformer=None):
-        transformer_map = {
-            str: self.get,
-            int: self.getint,
-            bool: self.getboolean,
-            float: self.getfloat,
-        }
-        result = self._dict()
-        for key, value in self.items(section):
-            transformer = None
-            if transformers.get(key) in transformer_map:
-                transformer = partial(transformer_map[transformers[key]], section)
-            elif default_transformer in transformer_map:
-                transformer = partial(transformer_map[default_transformer], section)
-            if transformer is not None:
-                try:
-                    value = transformer(key)
-                except (TypeError, ValueError):
-                    pass
-            result[key] = value
-        return result
-
-    def log_levels(self):
-        return {key: getattr(logging, self.get('Logging', key).upper())
-                for key in self.options('Logging')
-                if key not in self.LOGGING_NONLEVELS}
-
-    def dispatch_classes(self):
-        mod_name = 'arvnodeman.computenode.dispatch'
-        if self.has_option('Daemon', 'dispatcher'):
-            mod_name = '{}.{}'.format(mod_name,
-                                      self.get('Daemon', 'dispatcher'))
-        module = importlib.import_module(mod_name)
-        return (module.ComputeNodeSetupActor,
-                module.ComputeNodeShutdownActor,
-                module.ComputeNodeUpdateActor,
-                module.ComputeNodeMonitorActor)
-
-    def new_arvados_client(self):
-        if self.has_option('Daemon', 'certs_file'):
-            certs_file = self.get('Daemon', 'certs_file')
-        else:
-            certs_file = None
-        insecure = self.getboolean('Arvados', 'insecure')
-        http = httplib2.Http(timeout=self.getint('Arvados', 'timeout'),
-                             ca_certs=certs_file,
-                             disable_ssl_certificate_validation=insecure)
-        return arvados.api(version='v1',
-                           host=self.get('Arvados', 'host'),
-                           token=self.get('Arvados', 'token'),
-                           insecure=insecure,
-                           http=http)
-
-    def new_cloud_client(self):
-        module = importlib.import_module('arvnodeman.computenode.driver.' +
-                                         self.get('Cloud', 'provider'))
-        driver_class = module.ComputeNodeDriver.DEFAULT_DRIVER
-        if self.has_option('Cloud', 'driver_class'):
-            d = self.get('Cloud', 'driver_class').split('.')
-            mod = '.'.join(d[:-1])
-            cls = d[-1]
-            driver_class = importlib.import_module(mod).__dict__[cls]
-        auth_kwargs = self.get_section('Cloud Credentials')
-        if 'timeout' in auth_kwargs:
-            auth_kwargs['timeout'] = int(auth_kwargs['timeout'])
-        return module.ComputeNodeDriver(auth_kwargs,
-                                        self.get_section('Cloud List'),
-                                        self.get_section('Cloud Create'),
-                                        driver_class=driver_class)
-
-    def node_sizes(self):
-        """Finds all acceptable NodeSizes for our installation.
-
-        Returns a list of (NodeSize, kwargs) pairs for each NodeSize object
-        returned by libcloud that matches a size listed in our config file.
-        """
-        all_sizes = self.new_cloud_client().list_sizes()
-        size_kwargs = {}
-        section_types = {
-            'instance_type': str,
-            'price': float,
-            'preemptible': bool,
-        }
-        for sec_name in self.sections():
-            sec_words = sec_name.split(None, 2)
-            if sec_words[0] != 'Size':
-                continue
-            size_spec = self.get_section(sec_name, section_types, int)
-            if 'preemptible' not in size_spec:
-                size_spec['preemptible'] = False
-            if 'instance_type' not in size_spec:
-                # Assume instance type is Size name if missing
-                size_spec['instance_type'] = sec_words[1]
-            size_spec['id'] = sec_words[1]
-            size_kwargs[sec_words[1]] = size_spec
-        # EC2 node sizes are identified by id. GCE sizes are identified by name.
-        matching_sizes = []
-        for size in all_sizes:
-            matching_sizes += [
-                (size, size_kwargs[s]) for s in size_kwargs
-                if size_kwargs[s]['instance_type'] == size.id
-                or size_kwargs[s]['instance_type'] == size.name
-            ]
-        return matching_sizes
-
-    def shutdown_windows(self):
-        return [float(n)
-                for n in self.get('Cloud', 'shutdown_windows').split(',')]
diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py
deleted file mode 100644 (file)
index 1edf4dc..0000000
+++ /dev/null
@@ -1,583 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import functools
-import logging
-import time
-
-import pykka
-
-from . import computenode as cnode
-from . import status
-from .computenode import dispatch
-from .config import actor_class
-
-class _ComputeNodeRecord(object):
-    def __init__(self, actor=None, cloud_node=None, arvados_node=None,
-                 assignment_time=float('-inf')):
-        self.actor = actor
-        self.cloud_node = cloud_node
-        self.arvados_node = arvados_node
-        self.assignment_time = assignment_time
-        self.shutdown_actor = None
-
-class _BaseNodeTracker(object):
-    def __init__(self):
-        self.nodes = {}
-        self.orphans = {}
-
-    # Proxy the methods listed below to self.nodes.
-    def _proxy_method(name):
-        method = getattr(dict, name)
-        @functools.wraps(method, ('__name__', '__doc__'))
-        def wrapper(self, *args, **kwargs):
-            return method(self.nodes, *args, **kwargs)
-        return wrapper
-
-    for _method_name in ['__contains__', '__getitem__', '__len__', 'get']:
-        locals()[_method_name] = _proxy_method(_method_name)
-
-    def record_key(self, record):
-        return self.item_key(getattr(record, self.RECORD_ATTR))
-
-    def add(self, record):
-        self.nodes[self.record_key(record)] = record
-
-    def update_record(self, key, item):
-        setattr(self.nodes[key], self.RECORD_ATTR, item)
-
-    def update_from(self, response):
-        unseen = set(self.nodes.iterkeys())
-        for item in response:
-            key = self.item_key(item)
-            if key in unseen:
-                unseen.remove(key)
-                self.update_record(key, item)
-            else:
-                yield key, item
-        self.orphans = {key: self.nodes.pop(key) for key in unseen}
-
-    def unpaired(self):
-        return (record for record in self.nodes.itervalues()
-                if getattr(record, self.PAIR_ATTR) is None)
-
-
-class _CloudNodeTracker(_BaseNodeTracker):
-    RECORD_ATTR = 'cloud_node'
-    PAIR_ATTR = 'arvados_node'
-    item_key = staticmethod(lambda cloud_node: cloud_node.id)
-
-
-class _ArvadosNodeTracker(_BaseNodeTracker):
-    RECORD_ATTR = 'arvados_node'
-    PAIR_ATTR = 'cloud_node'
-    item_key = staticmethod(lambda arvados_node: arvados_node['uuid'])
-
-    def find_stale_node(self, stale_time):
-        # Try to select a stale node record that have an assigned slot first
-        for record in sorted(self.nodes.itervalues(),
-                             key=lambda r: r.arvados_node['slot_number'],
-                             reverse=True):
-            node = record.arvados_node
-            if (not cnode.timestamp_fresh(cnode.arvados_node_mtime(node),
-                                          stale_time) and
-                  not cnode.timestamp_fresh(record.assignment_time,
-                                            stale_time)):
-                return node
-        return None
-
-
-class NodeManagerDaemonActor(actor_class):
-    """Node Manager daemon.
-
-    This actor subscribes to all information polls about cloud nodes,
-    Arvados nodes, and the job queue.  It creates a ComputeNodeMonitorActor
-    for every cloud node, subscribing them to poll updates
-    appropriately.  It creates and destroys cloud nodes based on job queue
-    demand, and stops the corresponding ComputeNode actors when their work
-    is done.
-    """
-    def __init__(self, server_wishlist_actor, arvados_nodes_actor,
-                 cloud_nodes_actor, cloud_update_actor, timer_actor,
-                 arvados_factory, cloud_factory,
-                 shutdown_windows, server_calculator,
-                 min_nodes, max_nodes,
-                 poll_stale_after=600,
-                 boot_fail_after=1800,
-                 node_stale_after=7200,
-                 node_setup_class=dispatch.ComputeNodeSetupActor,
-                 node_shutdown_class=dispatch.ComputeNodeShutdownActor,
-                 node_actor_class=dispatch.ComputeNodeMonitorActor,
-                 max_total_price=0,
-                 consecutive_idle_count=1):
-        super(NodeManagerDaemonActor, self).__init__()
-        self._node_setup = node_setup_class
-        self._node_shutdown = node_shutdown_class
-        self._node_actor = node_actor_class
-        self._cloud_updater = cloud_update_actor
-        self._timer = timer_actor
-        self._new_arvados = arvados_factory
-        self._new_cloud = cloud_factory
-        self._cloud_driver = self._new_cloud()
-        self._later = self.actor_ref.tell_proxy()
-        self.shutdown_windows = shutdown_windows
-        self.server_calculator = server_calculator
-        self.min_cloud_size = self.server_calculator.cheapest_size()
-        self.min_nodes = min_nodes
-        self.max_nodes = max_nodes
-        self.node_quota = max_nodes
-        self.max_total_price = max_total_price
-        self.poll_stale_after = poll_stale_after
-        self.boot_fail_after = boot_fail_after
-        self.node_stale_after = node_stale_after
-        self.consecutive_idle_count = consecutive_idle_count
-        self.last_polls = {}
-        for poll_name in ['server_wishlist', 'arvados_nodes', 'cloud_nodes']:
-            poll_actor = locals()[poll_name + '_actor']
-            poll_actor.subscribe(getattr(self._later, 'update_' + poll_name))
-            setattr(self, '_{}_actor'.format(poll_name), poll_actor)
-            self.last_polls[poll_name] = -self.poll_stale_after
-        self.cloud_nodes = _CloudNodeTracker()
-        self.arvados_nodes = _ArvadosNodeTracker()
-        self.booting = {}       # Actor IDs to ComputeNodeSetupActors
-        self.sizes_booting = {} # Actor IDs to node size
-
-    def on_start(self):
-        self._logger = logging.getLogger("%s.%s" % (self.__class__.__name__, self.actor_urn[33:]))
-        self._logger.debug("Daemon started")
-
-    def _update_poll_time(self, poll_key):
-        self.last_polls[poll_key] = time.time()
-
-    def _pair_nodes(self, node_record, arvados_node):
-        self._logger.info("Cloud node %s is now paired with Arvados node %s with hostname %s",
-                          node_record.cloud_node.name, arvados_node['uuid'], arvados_node['hostname'])
-        self._arvados_nodes_actor.subscribe_to(
-            arvados_node['uuid'], node_record.actor.update_arvados_node)
-        node_record.arvados_node = arvados_node
-        self.arvados_nodes.add(node_record)
-
-    def _new_node(self, cloud_node):
-        start_time = self._cloud_driver.node_start_time(cloud_node)
-        shutdown_timer = cnode.ShutdownTimer(start_time,
-                                             self.shutdown_windows)
-        actor = self._node_actor.start(
-            cloud_node=cloud_node,
-            cloud_node_start_time=start_time,
-            shutdown_timer=shutdown_timer,
-            update_actor=self._cloud_updater,
-            timer_actor=self._timer,
-            arvados_node=None,
-            poll_stale_after=self.poll_stale_after,
-            node_stale_after=self.node_stale_after,
-            cloud_client=self._cloud_driver,
-            boot_fail_after=self.boot_fail_after,
-            consecutive_idle_count=self.consecutive_idle_count)
-        actorTell = actor.tell_proxy()
-        actorTell.subscribe(self._later.node_can_shutdown)
-        self._cloud_nodes_actor.subscribe_to(cloud_node.id,
-                                             actorTell.update_cloud_node)
-        record = _ComputeNodeRecord(actor.proxy(), cloud_node)
-        return record
-
-    def _register_cloud_node(self, node):
-        rec = self.cloud_nodes.get(node.id)
-        if rec is None:
-            self._logger.info("Registering new cloud node %s", node.id)
-            record = self._new_node(node)
-            self.cloud_nodes.add(record)
-        else:
-            rec.cloud_node = node
-
-    def update_cloud_nodes(self, nodelist):
-        self._update_poll_time('cloud_nodes')
-        for _, node in self.cloud_nodes.update_from(nodelist):
-            self._register_cloud_node(node)
-
-        self.try_pairing()
-
-        for record in self.cloud_nodes.orphans.itervalues():
-            if record.shutdown_actor:
-                try:
-                    record.shutdown_actor.stop()
-                except pykka.ActorDeadError:
-                    pass
-                record.shutdown_actor = None
-
-            # A recently booted node is a node that successfully completed the
-            # setup actor but has not yet appeared in the cloud node list.
-            # This will have the tag _nodemanager_recently_booted on it, which
-            # means (if we're not shutting it down) we want to put it back into
-            # the cloud node list.  Once it really appears in the cloud list,
-            # the object in record.cloud_node will be replaced by a new one
-            # that lacks the "_nodemanager_recently_booted" tag.
-            if hasattr(record.cloud_node, "_nodemanager_recently_booted"):
-                self.cloud_nodes.add(record)
-            else:
-                # Node disappeared from the cloud node list. If it's paired,
-                # remove its idle time counter.
-                if record.arvados_node:
-                    status.tracker.idle_out(record.arvados_node.get('hostname'))
-                # Stop the monitor actor if necessary and forget about the node.
-                if record.actor:
-                    try:
-                        record.actor.stop()
-                    except pykka.ActorDeadError:
-                        pass
-                    record.actor = None
-                record.cloud_node = None
-
-    def _register_arvados_node(self, key, arv_node):
-        self._logger.info("Registering new Arvados node %s", key)
-        record = _ComputeNodeRecord(arvados_node=arv_node)
-        self.arvados_nodes.add(record)
-
-    def update_arvados_nodes(self, nodelist):
-        self._update_poll_time('arvados_nodes')
-        for key, node in self.arvados_nodes.update_from(nodelist):
-            self._register_arvados_node(key, node)
-        self.try_pairing()
-
-    def try_pairing(self):
-        for record in self.cloud_nodes.unpaired():
-            for arv_rec in self.arvados_nodes.unpaired():
-                if record.actor is not None and record.actor.offer_arvados_pair(arv_rec.arvados_node).get():
-                    self._pair_nodes(record, arv_rec.arvados_node)
-                    break
-
-    def _nodes_booting(self, size):
-        s = sum(1
-                for c in self.booting.iterkeys()
-                if size is None or self.sizes_booting[c].id == size.id)
-        return s
-
-    def _node_states(self, size):
-        proxy_states = []
-        states = []
-        for rec in self.cloud_nodes.nodes.itervalues():
-            if size is None or rec.cloud_node.size.id == size.id:
-                if rec.shutdown_actor is None and rec.actor is not None:
-                    proxy_states.append(rec.actor.get_state())
-                else:
-                    states.append("shutdown")
-        return states + pykka.get_all(proxy_states)
-
-    def _update_tracker(self):
-        updates = {
-            k: 0
-            for k in status.tracker.keys()
-            if k.startswith('nodes_')
-        }
-        for s in self._node_states(size=None):
-            updates.setdefault('nodes_'+s, 0)
-            updates['nodes_'+s] += 1
-        updates['nodes_wish'] = len(self.last_wishlist)
-        updates['node_quota'] = self.node_quota
-        status.tracker.update(updates)
-
-    def _state_counts(self, size):
-        states = self._node_states(size)
-        counts = {
-            "booting": self._nodes_booting(size),
-            "unpaired": 0,
-            "busy": 0,
-            "idle": 0,
-            "fail": 0,
-            "down": 0,
-            "shutdown": 0
-        }
-        for s in states:
-            counts[s] = counts[s] + 1
-        return counts
-
-    def _nodes_up(self, counts):
-        up = counts["booting"] + counts["unpaired"] + counts["idle"] + counts["busy"]
-        return up
-
-    def _total_price(self):
-        cost = 0
-        cost += sum(self.sizes_booting[c].price
-                    for c in self.booting.iterkeys())
-        cost += sum(c.cloud_node.size.price
-                    for c in self.cloud_nodes.nodes.itervalues())
-        return cost
-
-    def _size_wishlist(self, size):
-        return sum(1 for c in self.last_wishlist if c.id == size.id)
-
-    def _nodes_wanted(self, size):
-        total_node_count = self._nodes_booting(None) + len(self.cloud_nodes)
-        under_min = self.min_nodes - total_node_count
-        over_max = total_node_count - self.node_quota
-        total_price = self._total_price()
-
-        counts = self._state_counts(size)
-
-        up_count = self._nodes_up(counts)
-        busy_count = counts["busy"]
-        wishlist_count = self._size_wishlist(size)
-
-        self._logger.info("%s: wishlist %i, up %i (booting %i, unpaired %i, idle %i, busy %i), down %i, shutdown %i", size.id,
-                          wishlist_count,
-                          up_count,
-                          counts["booting"],
-                          counts["unpaired"],
-                          counts["idle"],
-                          busy_count,
-                          counts["down"]+counts["fail"],
-                          counts["shutdown"])
-
-        if over_max >= 0:
-            return -over_max
-        elif under_min > 0 and size.id == self.min_cloud_size.id:
-            return under_min
-
-        wanted = wishlist_count - (up_count - busy_count)
-        if wanted > 0 and self.max_total_price and ((total_price + (size.price*wanted)) > self.max_total_price):
-            can_boot = int((self.max_total_price - total_price) / size.price)
-            if can_boot == 0:
-                self._logger.info("Not booting %s (price %s) because with it would exceed max_total_price of %s (current total_price is %s)",
-                                  size.id, size.price, self.max_total_price, total_price)
-            return can_boot
-        else:
-            return wanted
-
-    def _nodes_excess(self, size):
-        counts = self._state_counts(size)
-        up_count = self._nodes_up(counts)
-        if size.id == self.min_cloud_size.id:
-            up_count -= self.min_nodes
-        return up_count - (counts["busy"] + self._size_wishlist(size))
-
-    def update_server_wishlist(self, wishlist):
-        self._update_poll_time('server_wishlist')
-        requestable_nodes = self.node_quota - (self._nodes_booting(None) + len(self.cloud_nodes))
-        self.last_wishlist = wishlist[:requestable_nodes]
-        for size in reversed(self.server_calculator.cloud_sizes):
-            try:
-                nodes_wanted = self._nodes_wanted(size)
-                if nodes_wanted > 0:
-                    self._later.start_node(size)
-                elif (nodes_wanted < 0) and self.booting:
-                    self._later.stop_booting_node(size)
-            except Exception:
-                self._logger.exception("while calculating nodes wanted for size %s", getattr(size, "id", "(id not available)"))
-        try:
-            self._update_tracker()
-        except:
-            self._logger.exception("while updating tracker")
-
-    def _check_poll_freshness(orig_func):
-        """Decorator to inhibit a method when poll information is stale.
-
-        This decorator checks the timestamps of all the poll information the
-        daemon has received.  The decorated method is only called if none
-        of the timestamps are considered stale.
-        """
-        @functools.wraps(orig_func)
-        def wrapper(self, *args, **kwargs):
-            now = time.time()
-            if all(now - t < self.poll_stale_after
-                   for t in self.last_polls.itervalues()):
-                return orig_func(self, *args, **kwargs)
-            else:
-                return None
-        return wrapper
-
-    @_check_poll_freshness
-    def start_node(self, cloud_size):
-        nodes_wanted = self._nodes_wanted(cloud_size)
-        if nodes_wanted < 1:
-            return None
-
-        if not self.cancel_node_shutdown(cloud_size):
-            arvados_node = self.arvados_nodes.find_stale_node(self.node_stale_after)
-            self._logger.info("Want %i more %s nodes.  Booting a node.",
-                              nodes_wanted, cloud_size.id)
-            new_setup = self._node_setup.start(
-                timer_actor=self._timer,
-                arvados_client=self._new_arvados(),
-                arvados_node=arvados_node,
-                cloud_client=self._new_cloud(),
-                cloud_size=self.server_calculator.find_size(cloud_size.id))
-            self.booting[new_setup.actor_urn] = new_setup.proxy()
-            self.sizes_booting[new_setup.actor_urn] = cloud_size
-
-            if arvados_node is not None:
-                self.arvados_nodes[arvados_node['uuid']].assignment_time = (
-                    time.time())
-            new_setup.tell_proxy().subscribe(self._later.node_setup_finished)
-
-        if nodes_wanted > 1:
-            self._later.start_node(cloud_size)
-
-    def _get_actor_attrs(self, actor, *attr_names):
-        return pykka.get_all([getattr(actor, name) for name in attr_names])
-
-    def node_setup_finished(self, setup_proxy):
-        # Called when a SetupActor has completed.
-        cloud_node, arvados_node, error = self._get_actor_attrs(
-            setup_proxy, 'cloud_node', 'arvados_node', 'error')
-        setup_proxy.stop()
-
-        if cloud_node is None:
-            # If cloud_node is None then the node create wasn't successful.
-            if error == dispatch.QuotaExceeded:
-                # We've hit a quota limit, so adjust node_quota to stop trying to
-                # boot new nodes until the node count goes down.
-                self.node_quota = len(self.cloud_nodes)
-                self._logger.warning("After quota exceeded error setting node quota to %s", self.node_quota)
-        else:
-            # Node creation succeeded.  Update cloud node list.
-            cloud_node._nodemanager_recently_booted = True
-            self._register_cloud_node(cloud_node)
-
-            # Different quota policies may in force depending on the cloud
-            # provider, account limits, and the specific mix of nodes sizes
-            # that are already created.  If we are right at the quota limit,
-            # we want to probe to see if the last quota still applies or if we
-            # are allowed to create more nodes.
-            #
-            # For example, if the quota is actually based on core count, the
-            # quota might be 20 single-core machines or 10 dual-core machines.
-            # If we previously set node_quota to 10 dual core machines, but are
-            # now booting single core machines (actual quota 20), we want to
-            # allow the quota to expand so we don't get stuck at 10 machines
-            # forever.
-            if len(self.cloud_nodes) >= self.node_quota:
-                self.node_quota = len(self.cloud_nodes)+1
-                self._logger.warning("After successful boot setting node quota to %s", self.node_quota)
-
-        self.node_quota = min(self.node_quota, self.max_nodes)
-        del self.booting[setup_proxy.actor_ref.actor_urn]
-        del self.sizes_booting[setup_proxy.actor_ref.actor_urn]
-
-    @_check_poll_freshness
-    def stop_booting_node(self, size):
-        nodes_excess = self._nodes_excess(size)
-        if (nodes_excess < 1) or not self.booting:
-            return None
-        for key, node in self.booting.iteritems():
-            try:
-                if node and node.cloud_size.get().id == size.id and node.stop_if_no_cloud_node().get(2):
-                    del self.booting[key]
-                    del self.sizes_booting[key]
-                    if nodes_excess > 1:
-                        self._later.stop_booting_node(size)
-                    return
-            except pykka.Timeout:
-                pass
-
-    @_check_poll_freshness
-    def cancel_node_shutdown(self, size):
-        # Go through shutdown actors and see if there are any of the appropriate size that can be cancelled
-        for record in self.cloud_nodes.nodes.itervalues():
-            try:
-                if (record.shutdown_actor is not None and
-                    record.cloud_node.size.id == size.id and
-                    record.shutdown_actor.cancel_shutdown("Node size is in wishlist").get(2)):
-                        return True
-            except (pykka.ActorDeadError, pykka.Timeout) as e:
-                pass
-        return False
-
-    def _begin_node_shutdown(self, node_actor, cancellable):
-        cloud_node_obj = node_actor.cloud_node.get()
-        cloud_node_id = cloud_node_obj.id
-        record = self.cloud_nodes[cloud_node_id]
-        if record.shutdown_actor is not None:
-            return None
-        shutdown = self._node_shutdown.start(
-            timer_actor=self._timer, cloud_client=self._new_cloud(),
-            arvados_client=self._new_arvados(),
-            node_monitor=node_actor.actor_ref, cancellable=cancellable)
-        record.shutdown_actor = shutdown.proxy()
-        shutdown.tell_proxy().subscribe(self._later.node_finished_shutdown)
-
-    @_check_poll_freshness
-    def node_can_shutdown(self, node_actor):
-        try:
-            if self._nodes_excess(node_actor.cloud_node.get().size) > 0:
-                self._begin_node_shutdown(node_actor, cancellable=True)
-            elif self.cloud_nodes.nodes.get(node_actor.cloud_node.get().id).arvados_node is None:
-                # Node is unpaired, which means it probably exceeded its booting
-                # grace period without a ping, so shut it down so we can boot a new
-                # node in its place.
-                self._begin_node_shutdown(node_actor, cancellable=False)
-            elif node_actor.in_state('down', 'fail').get():
-                # Node is down and unlikely to come back.
-                self._begin_node_shutdown(node_actor, cancellable=False)
-        except pykka.ActorDeadError as e:
-            # The monitor actor sends shutdown suggestions every time the
-            # node's state is updated, and these go into the daemon actor's
-            # message queue.  It's possible that the node has already been shut
-            # down (which shuts down the node monitor actor).  In that case,
-            # this message is stale and we'll get ActorDeadError when we try to
-            # access node_actor.  Log the error.
-            self._logger.debug("ActorDeadError in node_can_shutdown: %s", e)
-
-    def node_finished_shutdown(self, shutdown_actor):
-        try:
-            cloud_node, success = self._get_actor_attrs(
-                shutdown_actor, 'cloud_node', 'success')
-        except pykka.ActorDeadError:
-            return
-        cloud_node_id = cloud_node.id
-
-        try:
-            shutdown_actor.stop()
-        except pykka.ActorDeadError:
-            pass
-
-        try:
-            record = self.cloud_nodes[cloud_node_id]
-        except KeyError:
-            # Cloud node was already removed from the cloud node list
-            # supposedly while the destroy_node call was finishing its
-            # job.
-            return
-        record.shutdown_actor = None
-
-        if not success:
-            return
-
-        # Shutdown was successful, so stop the monitor actor, otherwise it
-        # will keep offering the node as a candidate for shutdown.
-        record.actor.stop()
-        record.actor = None
-
-        # If the node went from being booted to being shut down without ever
-        # appearing in the cloud node list, it will have the
-        # _nodemanager_recently_booted tag, so get rid of it so that the node
-        # can be forgotten completely.
-        if hasattr(record.cloud_node, "_nodemanager_recently_booted"):
-            del record.cloud_node._nodemanager_recently_booted
-
-    def shutdown(self):
-        self._logger.info("Shutting down after signal.")
-        self.poll_stale_after = -1  # Inhibit starting/stopping nodes
-
-        # Shut down pollers
-        self._server_wishlist_actor.stop()
-        self._arvados_nodes_actor.stop()
-        self._cloud_nodes_actor.stop()
-
-        # Clear cloud node list
-        self.update_cloud_nodes([])
-
-        # Stop setup actors unless they are in the middle of setup.
-        setup_stops = {key: node.stop_if_no_cloud_node()
-                       for key, node in self.booting.iteritems()}
-        self.booting = {key: self.booting[key]
-                        for key in setup_stops if not setup_stops[key].get()}
-        self._later.await_shutdown()
-
-    def await_shutdown(self):
-        if self.booting:
-            self._timer.schedule(time.time() + 1, self._later.await_shutdown)
-        else:
-            self.stop()
diff --git a/services/nodemanager/arvnodeman/jobqueue.py b/services/nodemanager/arvnodeman/jobqueue.py
deleted file mode 100644 (file)
index 7ca9c95..0000000
+++ /dev/null
@@ -1,255 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import logging
-import re
-import subprocess32 as subprocess
-
-import arvados.util
-
-from . import clientactor
-from .config import ARVADOS_ERRORS
-
-
-class ServerCalculator(object):
-    """Generate cloud server wishlists from an Arvados job queue.
-
-    Instantiate this class with a list of cloud node sizes you're willing to
-    use, plus keyword overrides from the configuration.  Then you can pass
-    job queues to servers_for_queue.  It will return a list of node sizes
-    that would best satisfy the jobs, choosing the cheapest size that
-    satisfies each job, and ignoring jobs that can't be satisfied.
-    """
-    class InvalidCloudSize(object):
-        """
-        Dummy CloudSizeWrapper-like class, to be used when a cloud node doesn't
-        have a recognizable arvados_node_size tag.
-        """
-        def __init__(self):
-            self.id = 'invalid'
-            self.name = 'invalid'
-            self.ram = 0
-            self.disk = 0
-            self.scratch = 0
-            self.cores = 0
-            self.bandwidth = 0
-            # price is multiplied by 1000 to get the node weight
-            # the maximum node weight is                  4294967280
-            # so use invalid node weight 4294967 * 1000 = 4294967000
-            self.price = 4294967
-            self.preemptible = False
-            self.extra = {}
-
-        def meets_constraints(self, **kwargs):
-            return False
-
-
-    class CloudSizeWrapper(object):
-        def __init__(self, real_size, node_mem_scaling, **kwargs):
-            self.real = real_size
-            for name in ['id', 'name', 'ram', 'disk', 'bandwidth', 'price',
-                         'extra']:
-                setattr(self, name, getattr(self.real, name))
-            self.cores = kwargs.pop('cores')
-            # libcloud disk sizes are in GB, Arvados/SLURM are in MB
-            # multiply by 1000 instead of 1024 to err on low side
-            if self.disk is None:
-                self.disk = 0
-            self.scratch = self.disk * 1000
-            self.ram = int(self.ram * node_mem_scaling)
-            self.preemptible = False
-            for name, override in kwargs.iteritems():
-                if name == 'instance_type': continue
-                if not hasattr(self, name):
-                    raise ValueError("unrecognized size field '%s'" % (name,))
-                setattr(self, name, override)
-
-            if self.price is None:
-                raise ValueError("Required field 'price' is None")
-
-        def meets_constraints(self, **kwargs):
-            for name, want_value in kwargs.iteritems():
-                have_value = getattr(self, name)
-                if (have_value != 0) and (have_value < want_value):
-                    return False
-            return True
-
-
-    def __init__(self, server_list, max_nodes=None, max_price=None,
-                 node_mem_scaling=0.95):
-        self.cloud_sizes = [self.CloudSizeWrapper(s, node_mem_scaling, **kws)
-                            for s, kws in server_list]
-        self.cloud_sizes.sort(key=lambda s: s.price)
-        self.max_nodes = max_nodes or float('inf')
-        self.max_price = max_price or float('inf')
-        self.logger = logging.getLogger('arvnodeman.jobqueue')
-
-        self.logger.info("Using cloud node sizes:")
-        for s in self.cloud_sizes:
-            self.logger.info(str(s.__dict__))
-
-    @staticmethod
-    def coerce_int(x, fallback):
-        try:
-            return int(x)
-        except (TypeError, ValueError):
-            return fallback
-
-    def cloud_size_for_constraints(self, constraints):
-        specified_size = constraints.get('instance_type')
-        want_value = lambda key: self.coerce_int(constraints.get(key), 0)
-        wants = {'cores': want_value('min_cores_per_node'),
-                 'ram': want_value('min_ram_mb_per_node'),
-                 'scratch': want_value('min_scratch_mb_per_node')}
-        # EC2 node sizes are identified by id. GCE sizes are identified by name.
-        for size in self.cloud_sizes:
-            if (size.meets_constraints(**wants) and
-                (specified_size is None or
-                    size.id == specified_size or size.name == specified_size)):
-                        return size
-        return None
-
-    def servers_for_queue(self, queue):
-        servers = []
-        unsatisfiable_jobs = {}
-        for job in queue:
-            constraints = job['runtime_constraints']
-            want_count = max(1, self.coerce_int(constraints.get('min_nodes'), 1))
-            cloud_size = self.cloud_size_for_constraints(constraints)
-            if cloud_size is None:
-                unsatisfiable_jobs[job['uuid']] = (
-                    "Constraints cannot be satisfied by any node type")
-            elif (want_count > self.max_nodes):
-                unsatisfiable_jobs[job['uuid']] = (
-                    "Job's min_nodes constraint is greater than the configured "
-                    "max_nodes (%d)" % self.max_nodes)
-            elif (want_count*cloud_size.price <= self.max_price):
-                servers.extend([cloud_size] * want_count)
-            else:
-                unsatisfiable_jobs[job['uuid']] = (
-                    "Job's price (%d) is above system's max_price "
-                    "limit (%d)" % (want_count*cloud_size.price, self.max_price))
-        return (servers, unsatisfiable_jobs)
-
-    def cheapest_size(self):
-        return self.cloud_sizes[0]
-
-    def find_size(self, sizeid):
-        for s in self.cloud_sizes:
-            if s.id == sizeid:
-                return s
-        return self.InvalidCloudSize()
-
-
-class JobQueueMonitorActor(clientactor.RemotePollLoopActor):
-    """Actor to generate server wishlists from the job queue.
-
-    This actor regularly polls Arvados' job queue, and uses the provided
-    ServerCalculator to turn that into a list of requested node sizes.  That
-    list is sent to subscribers on every poll.
-    """
-
-    CLIENT_ERRORS = ARVADOS_ERRORS
-
-    def __init__(self, client, timer_actor, server_calc,
-                 jobs_queue, slurm_queue, *args, **kwargs):
-        super(JobQueueMonitorActor, self).__init__(
-            client, timer_actor, *args, **kwargs)
-        self.jobs_queue = jobs_queue
-        self.slurm_queue = slurm_queue
-        self._calculator = server_calc
-
-    @staticmethod
-    def coerce_to_mb(x):
-        v, u = x[:-1], x[-1]
-        if u in ("M", "m"):
-            return int(v)
-        elif u in ("G", "g"):
-            return float(v) * 2**10
-        elif u in ("T", "t"):
-            return float(v) * 2**20
-        elif u in ("P", "p"):
-            return float(v) * 2**30
-        else:
-            return int(x)
-
-    def _send_request(self):
-        queuelist = []
-        if self.slurm_queue:
-            # cpus, memory, tempory disk space, reason, job name, feature constraints, priority
-            squeue_out = subprocess.check_output(["squeue", "--state=PENDING", "--noheader", "--format=%c|%m|%d|%r|%j|%f|%Q"])
-            for out in squeue_out.splitlines():
-                try:
-                    cpu, ram, disk, reason, jobname, features, priority = out.split("|", 6)
-                except ValueError:
-                    self._logger.warning("ignored malformed line in squeue output: %r", out)
-                    continue
-                if '-dz642-' not in jobname:
-                    continue
-                if not re.search(r'BadConstraints|ReqNodeNotAvail|Resources|Priority', reason):
-                    continue
-
-                for feature in features.split(','):
-                    m = re.match(r'instancetype=(.*)', feature)
-                    if not m:
-                        continue
-                    instance_type = m.group(1)
-                    # Ignore cpu/ram/scratch requirements, bring up
-                    # the requested node type.
-                    queuelist.append({
-                        "uuid": jobname,
-                        "runtime_constraints": {
-                            "instance_type": instance_type,
-                        },
-                        "priority": int(priority)
-                    })
-                    break
-                else:
-                    # No instance type specified. Choose a node type
-                    # to suit cpu/ram/scratch requirements.
-                    queuelist.append({
-                        "uuid": jobname,
-                        "runtime_constraints": {
-                            "min_cores_per_node": cpu,
-                            "min_ram_mb_per_node": self.coerce_to_mb(ram),
-                            "min_scratch_mb_per_node": self.coerce_to_mb(disk)
-                        },
-                        "priority": int(priority)
-                    })
-            queuelist.sort(key=lambda x: x.get('priority', 1), reverse=True)
-
-        if self.jobs_queue:
-            queuelist.extend(self._client.jobs().queue().execute()['items'])
-
-        return queuelist
-
-    def _got_response(self, queue):
-        server_list, unsatisfiable_jobs = self._calculator.servers_for_queue(queue)
-        # Cancel any job/container with unsatisfiable requirements, emitting
-        # a log explaining why.
-        for job_uuid, reason in unsatisfiable_jobs.iteritems():
-            try:
-                self._client.logs().create(body={
-                    'object_uuid': job_uuid,
-                    'event_type': 'stderr',
-                    'properties': {'text': reason},
-                }).execute()
-                # Cancel the job depending on its type
-                if arvados.util.container_uuid_pattern.match(job_uuid):
-                    subprocess.check_call(['scancel', '--name='+job_uuid])
-                elif arvados.util.job_uuid_pattern.match(job_uuid):
-                    self._client.jobs().cancel(uuid=job_uuid).execute()
-                else:
-                    raise Exception('Unknown job type')
-                self._logger.debug("Cancelled unsatisfiable job '%s'", job_uuid)
-            except Exception as error:
-                self._logger.error("Trying to cancel job '%s': %s",
-                                   job_uuid,
-                                   error)
-        self._logger.debug("Calculated wishlist: %s",
-                           ', '.join(s.id for s in server_list) or "(empty)")
-        return super(JobQueueMonitorActor, self)._got_response(server_list)
diff --git a/services/nodemanager/arvnodeman/launcher.py b/services/nodemanager/arvnodeman/launcher.py
deleted file mode 100644 (file)
index 34ea9ad..0000000
+++ /dev/null
@@ -1,171 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import argparse
-import logging
-import signal
-import sys
-import time
-
-import daemon
-import pykka
-import libcloud
-
-from . import config as nmconfig
-from . import status
-from .baseactor import WatchdogActor
-from .daemon import NodeManagerDaemonActor
-from .jobqueue import JobQueueMonitorActor, ServerCalculator
-from .nodelist import ArvadosNodeListMonitorActor, CloudNodeListMonitorActor
-from .timedcallback import TimedCallBackActor
-from ._version import __version__
-
-node_daemon = None
-watchdog = None
-
-def abort(msg, code=1):
-    print("arvados-node-manager: " + msg)
-    sys.exit(code)
-
-def parse_cli(args):
-    parser = argparse.ArgumentParser(
-        prog='arvados-node-manager',
-        description="Dynamically allocate Arvados cloud compute nodes")
-    parser.add_argument(
-        '--version', action='version',
-        version="%s %s" % (sys.argv[0], __version__),
-        help='Print version and exit.')
-    parser.add_argument(
-        '--foreground', action='store_true', default=False,
-        help="Run in the foreground.  Don't daemonize.")
-    parser.add_argument(
-        '--config', help="Path to configuration file")
-    return parser.parse_args(args)
-
-def load_config(path):
-    if not path:
-        abort("No --config file specified", 2)
-    config = nmconfig.NodeManagerConfig()
-    try:
-        with open(path) as config_file:
-            config.readfp(config_file)
-    except (IOError, OSError) as error:
-        abort("Error reading configuration file {}: {}".format(path, error))
-    return config
-
-def setup_logging(path, level, **sublevels):
-    handler = logging.FileHandler(path)
-    handler.setFormatter(logging.Formatter(
-            '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
-            '%Y-%m-%d %H:%M:%S'))
-    root_logger = logging.getLogger()
-    root_logger.addHandler(handler)
-    root_logger.setLevel(level)
-    for logger_name, sublevel in sublevels.iteritems():
-        sublogger = logging.getLogger(logger_name)
-        sublogger.setLevel(sublevel)
-    return root_logger
-
-def build_server_calculator(config):
-    cloud_size_list = config.node_sizes()
-    if not cloud_size_list:
-        abort("No valid node sizes configured")
-    return ServerCalculator(cloud_size_list,
-                            config.getint('Daemon', 'max_nodes'),
-                            config.getfloat('Daemon', 'max_total_price'),
-                            config.getfloat('Daemon', 'node_mem_scaling'))
-
-def launch_pollers(config, server_calculator):
-    poll_time = config.getfloat('Daemon', 'poll_time')
-    max_poll_time = config.getint('Daemon', 'max_poll_time')
-
-    cloudlist_poll_time = config.getfloat('Daemon', 'cloudlist_poll_time') or poll_time
-    nodelist_poll_time = config.getfloat('Daemon', 'nodelist_poll_time') or poll_time
-    wishlist_poll_time = config.getfloat('Daemon', 'wishlist_poll_time') or poll_time
-
-    timer = TimedCallBackActor.start(poll_time / 10.0).tell_proxy()
-    cloud_node_poller = CloudNodeListMonitorActor.start(
-        config.new_cloud_client(), timer, server_calculator, cloudlist_poll_time, max_poll_time).tell_proxy()
-    arvados_node_poller = ArvadosNodeListMonitorActor.start(
-        config.new_arvados_client(), timer, nodelist_poll_time, max_poll_time).tell_proxy()
-    job_queue_poller = JobQueueMonitorActor.start(
-        config.new_arvados_client(), timer, server_calculator,
-        config.getboolean('Arvados', 'jobs_queue'),
-        config.getboolean('Arvados', 'slurm_queue'),
-        wishlist_poll_time, max_poll_time
-    ).tell_proxy()
-    return timer, cloud_node_poller, arvados_node_poller, job_queue_poller
-
-_caught_signals = {}
-def shutdown_signal(signal_code, frame):
-    current_count = _caught_signals.get(signal_code, 0)
-    _caught_signals[signal_code] = current_count + 1
-    if node_daemon is None:
-        pykka.ActorRegistry.stop_all()
-        sys.exit(-signal_code)
-    elif current_count == 0:
-        watchdog.stop()
-        node_daemon.shutdown()
-    elif current_count == 1:
-        pykka.ActorRegistry.stop_all()
-    else:
-        sys.exit(-signal_code)
-
-def main(args=None):
-    global node_daemon, watchdog
-    args = parse_cli(args)
-    config = load_config(args.config)
-
-    if not args.foreground:
-        daemon.DaemonContext().open()
-    for sigcode in [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM]:
-        signal.signal(sigcode, shutdown_signal)
-
-    status.Server(config).start()
-
-    try:
-        root_logger = setup_logging(config.get('Logging', 'file'), **config.log_levels())
-        root_logger.info("%s %s started, libcloud %s", sys.argv[0], __version__, libcloud.__version__)
-        node_setup, node_shutdown, node_update, node_monitor = \
-            config.dispatch_classes()
-        server_calculator = build_server_calculator(config)
-        timer, cloud_node_poller, arvados_node_poller, job_queue_poller = \
-            launch_pollers(config, server_calculator)
-        cloud_node_updater = node_update.start(config.new_cloud_client, timer).tell_proxy()
-        node_daemon = NodeManagerDaemonActor.start(
-            job_queue_poller, arvados_node_poller, cloud_node_poller,
-            cloud_node_updater, timer,
-            config.new_arvados_client, config.new_cloud_client,
-            config.shutdown_windows(),
-            server_calculator,
-            config.getint('Daemon', 'min_nodes'),
-            config.getint('Daemon', 'max_nodes'),
-            config.getint('Daemon', 'poll_stale_after'),
-            config.getint('Daemon', 'boot_fail_after'),
-            config.getint('Daemon', 'node_stale_after'),
-            node_setup, node_shutdown, node_monitor,
-            max_total_price=config.getfloat('Daemon', 'max_total_price'),
-            consecutive_idle_count=config.getint('Daemon', 'consecutive_idle_count'),).tell_proxy()
-
-        watchdog = WatchdogActor.start(config.getint('Daemon', 'watchdog'),
-                            cloud_node_poller.actor_ref,
-                            arvados_node_poller.actor_ref,
-                            job_queue_poller.actor_ref,
-                            node_daemon.actor_ref)
-
-        signal.pause()
-        daemon_stopped = node_daemon.actor_ref.actor_stopped.is_set
-        while not daemon_stopped():
-            time.sleep(1)
-    except Exception:
-        logging.exception("Uncaught exception during setup")
-    finally:
-        pykka.ActorRegistry.stop_all()
-
-
-if __name__ == '__main__':
-    main()
diff --git a/services/nodemanager/arvnodeman/nodelist.py b/services/nodemanager/arvnodeman/nodelist.py
deleted file mode 100644 (file)
index 0abb3b3..0000000
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import subprocess32 as subprocess
-
-from . import clientactor
-from . import config
-
-import arvados.util
-
-class ArvadosNodeListMonitorActor(clientactor.RemotePollLoopActor):
-    """Actor to poll the Arvados node list.
-
-    This actor regularly polls the list of Arvados node records,
-    augments it with the latest SLURM node info (`sinfo`), and sends
-    it to subscribers.
-    """
-
-    def is_common_error(self, exception):
-        return isinstance(exception, config.ARVADOS_ERRORS)
-
-    def _item_key(self, node):
-        return node['uuid']
-
-    def _send_request(self):
-        nodelist = arvados.util.list_all(self._client.nodes().list)
-
-        # node hostname, state
-        sinfo_out = subprocess.check_output(["sinfo", "--noheader", "--format=%n|%t|%f"])
-        nodestates = {}
-        nodefeatures = {}
-        for out in sinfo_out.splitlines():
-            try:
-                nodename, state, features = out.split("|", 3)
-            except ValueError:
-                continue
-            if state in ('alloc', 'alloc*',
-                         'comp',  'comp*',
-                         'mix',   'mix*',
-                         'drng',  'drng*'):
-                nodestates[nodename] = 'busy'
-            elif state in ('idle', 'fail'):
-                nodestates[nodename] = state
-            else:
-                nodestates[nodename] = 'down'
-            if features != "(null)":
-                nodefeatures[nodename] = features
-
-        for n in nodelist:
-            if n["slot_number"] and n["hostname"] and n["hostname"] in nodestates:
-                n["crunch_worker_state"] = nodestates[n["hostname"]]
-            else:
-                n["crunch_worker_state"] = 'down'
-            n["slurm_node_features"] = nodefeatures.get(n["hostname"], "")
-
-        return nodelist
-
-class CloudNodeListMonitorActor(clientactor.RemotePollLoopActor):
-    """Actor to poll the cloud node list.
-
-    This actor regularly polls the cloud to get a list of running compute
-    nodes, and sends it to subscribers.
-    """
-
-    def __init__(self, client, timer_actor, server_calc, *args, **kwargs):
-        super(CloudNodeListMonitorActor, self).__init__(
-            client, timer_actor, *args, **kwargs)
-        self._calculator = server_calc
-
-    def is_common_error(self, exception):
-        return isinstance(exception, config.CLOUD_ERRORS)
-
-    def _item_key(self, node):
-        return node.id
-
-    def _send_request(self):
-        nodes = self._client.list_nodes()
-        for n in nodes:
-            # Replace the libcloud NodeSize object with compatible
-            # CloudSizeWrapper object which merges the size info reported from
-            # the cloud with size information from the configuration file.
-            n.size = self._calculator.find_size(n.extra['arvados_node_size'])
-        return nodes
diff --git a/services/nodemanager/arvnodeman/status.py b/services/nodemanager/arvnodeman/status.py
deleted file mode 100644 (file)
index 1e18996..0000000
+++ /dev/null
@@ -1,129 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-from future import standard_library
-
-import http.server
-import time
-import json
-import logging
-import socketserver
-import threading
-
-from ._version import __version__
-
-_logger = logging.getLogger('status.Handler')
-
-
-class Server(socketserver.ThreadingMixIn, http.server.HTTPServer, object):
-    def __init__(self, config):
-        port = config.getint('Manage', 'port')
-        self.enabled = port >= 0
-        if not self.enabled:
-            _logger.warning("Management server disabled. "+
-                            "Use [Manage] config section to enable.")
-            return
-        self._config = config
-        self._tracker = tracker
-        self._tracker.update({'config_max_nodes': config.getint('Daemon', 'max_nodes')})
-        super(Server, self).__init__(
-            (config.get('Manage', 'address'), port), Handler)
-        self._thread = threading.Thread(target=self.serve_forever)
-        self._thread.daemon = True
-
-    def start(self):
-        if self.enabled:
-            self._thread.start()
-
-
-class Handler(http.server.BaseHTTPRequestHandler, object):
-    def do_GET(self):
-        if self.path == '/status.json':
-            self.send_response(200)
-            self.send_header('Content-type', 'application/json')
-            self.end_headers()
-            self.wfile.write(tracker.get_json())
-        elif self.path == '/_health/ping':
-            code, msg = self.check_auth()
-
-            if code != 200:
-              self.send_response(code)
-              self.wfile.write(msg)
-            else:
-              self.send_response(200)
-              self.send_header('Content-type', 'application/json')
-              self.end_headers()
-              self.wfile.write(json.dumps({"health":"OK"}))
-        else:
-            self.send_response(404)
-
-    def log_message(self, fmt, *args, **kwargs):
-        _logger.info(fmt, *args, **kwargs)
-
-    def check_auth(self):
-        mgmt_token = self.server._config.get('Manage', 'ManagementToken')
-        auth_header = self.headers.get('Authorization', None)
-
-        if mgmt_token == '':
-          return 404, "disabled"
-        elif auth_header == None:
-          return 401, "authorization required"
-        elif auth_header != 'Bearer '+mgmt_token:
-          return 403, "authorization error"
-        return 200, ""
-
-class Tracker(object):
-    def __init__(self):
-        self._mtx = threading.Lock()
-        self._latest = {
-            'list_nodes_errors': 0,
-            'create_node_errors': 0,
-            'destroy_node_errors': 0,
-            'boot_failures': 0,
-            'actor_exceptions': 0
-        }
-        self._version = {'Version' : __version__}
-        self._idle_nodes = {}
-
-    def get_json(self):
-        with self._mtx:
-            times = {'idle_times' : {}}
-            now = time.time()
-            for node, ts in self._idle_nodes.items():
-                times['idle_times'][node] = int(now - ts)
-            return json.dumps(
-                dict(dict(self._latest, **self._version), **times))
-
-    def keys(self):
-        with self._mtx:
-            return self._latest.keys()
-
-    def get(self, key):
-        with self._mtx:
-            return self._latest.get(key)
-
-    def update(self, updates):
-        with self._mtx:
-            self._latest.update(updates)
-
-    def counter_add(self, counter, value=1):
-        with self._mtx:
-            self._latest.setdefault(counter, 0)
-            self._latest[counter] += value
-
-    def idle_in(self, nodename):
-        with self._mtx:
-            if self._idle_nodes.get(nodename):
-                return
-            self._idle_nodes[nodename] = time.time()
-
-    def idle_out(self, nodename):
-        with self._mtx:
-            try:
-                del self._idle_nodes[nodename]
-            except KeyError:
-                pass
-
-tracker = Tracker()
diff --git a/services/nodemanager/arvnodeman/test/fake_driver.py b/services/nodemanager/arvnodeman/test/fake_driver.py
deleted file mode 100644 (file)
index 2a592f9..0000000
+++ /dev/null
@@ -1,226 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-import re
-import urllib
-import ssl
-import time
-
-from arvnodeman.computenode import ARVADOS_TIMEFMT
-
-from libcloud.compute.base import NodeSize, Node, NodeDriver, NodeState, NodeImage
-from libcloud.compute.drivers.gce import GCEDiskType
-from libcloud.common.exceptions import BaseHTTPError, RateLimitReachedError
-
-all_nodes = []
-create_calls = 0
-quota = 2
-
-class FakeDriver(NodeDriver):
-    def __init__(self, *args, **kwargs):
-        self.name = "FakeDriver"
-
-    def list_sizes(self, **kwargs):
-        return [NodeSize("Standard_D3", "Standard_D3", 3500, 200, 0, 0, self),
-                NodeSize("Standard_D4", "Standard_D4", 7000, 400, 0, 0, self)]
-
-    def list_nodes(self, **kwargs):
-        return all_nodes
-
-    def create_node(self, name=None,
-                    size=None,
-                    image=None,
-                    auth=None,
-                    ex_storage_account=None,
-                    ex_customdata=None,
-                    ex_resource_group=None,
-                    ex_user_name=None,
-                    ex_tags=None,
-                    ex_metadata=None,
-                    ex_network=None,
-                    ex_userdata=None):
-        global all_nodes, create_calls
-        create_calls += 1
-        nodeid = "node%i" % create_calls
-        if ex_tags is None:
-            ex_tags = {}
-        ex_tags.update({'arvados_node_size': size.id})
-        n = Node(nodeid, nodeid, NodeState.RUNNING, [], [], self, size=size, extra={"tags": ex_tags})
-        all_nodes.append(n)
-        if ex_customdata:
-            ping_url = re.search(r"echo '(.*)' > /var/tmp/arv-node-data/arv-ping-url", ex_customdata).groups(1)[0]
-        if ex_userdata:
-            ping_url = ex_userdata
-        elif ex_metadata:
-            ping_url = ex_metadata["arv-ping-url"]
-        ping_url += "&instance_id=" + nodeid
-        ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
-        ctx.verify_mode = ssl.CERT_NONE
-        f = urllib.urlopen(ping_url, "", context=ctx)
-        f.close()
-        return n
-
-    def destroy_node(self, cloud_node):
-        global all_nodes
-        all_nodes = [n for n in all_nodes if n.id != cloud_node.id]
-        return True
-
-    def get_image(self, img):
-        pass
-
-    def ex_create_tags(self, cloud_node, tags):
-        pass
-
-class QuotaDriver(FakeDriver):
-    def create_node(self, name=None,
-                    size=None,
-                    image=None,
-                    auth=None,
-                    ex_storage_account=None,
-                    ex_customdata=None,
-                    ex_resource_group=None,
-                    ex_user_name=None,
-                    ex_tags=None,
-                    ex_network=None):
-        global all_nodes, create_calls, quota
-        if len(all_nodes) >= quota:
-            raise BaseHTTPError(503, "Quota exceeded")
-        else:
-            return super(QuotaDriver, self).create_node(name=name,
-                    size=size,
-                    image=image,
-                    auth=auth,
-                    ex_storage_account=ex_storage_account,
-                    ex_customdata=ex_customdata,
-                    ex_resource_group=ex_resource_group,
-                    ex_user_name=ex_user_name,
-                    ex_tags=ex_tags,
-                    ex_network=ex_network)
-
-    def destroy_node(self, cloud_node):
-        global all_nodes, quota
-        all_nodes = [n for n in all_nodes if n.id != cloud_node.id]
-        if len(all_nodes) == 0:
-            quota = 4
-        return True
-
-class FailingDriver(FakeDriver):
-    def create_node(self, name=None,
-                    size=None,
-                    image=None,
-                    auth=None,
-                    ex_storage_account=None,
-                    ex_customdata=None,
-                    ex_resource_group=None,
-                    ex_user_name=None,
-                    ex_tags=None,
-                    ex_network=None):
-        raise Exception("nope")
-
-class RetryDriver(FakeDriver):
-    def create_node(self, name=None,
-                    size=None,
-                    image=None,
-                    auth=None,
-                    ex_storage_account=None,
-                    ex_customdata=None,
-                    ex_resource_group=None,
-                    ex_user_name=None,
-                    ex_tags=None,
-                    ex_network=None):
-        global create_calls
-        create_calls += 1
-        if create_calls < 2:
-            raise RateLimitReachedError(429, "Rate limit exceeded",
-                                        headers={'retry-after': '2'})
-        elif create_calls < 3:
-            raise BaseHTTPError(429, "Rate limit exceeded",
-                                {'retry-after': '1'})
-        else:
-            return super(RetryDriver, self).create_node(name=name,
-                    size=size,
-                    image=image,
-                    auth=auth,
-                    ex_storage_account=ex_storage_account,
-                    ex_customdata=ex_customdata,
-                    ex_resource_group=ex_resource_group,
-                    ex_user_name=ex_user_name,
-                    ex_tags=ex_tags,
-                    ex_network=ex_network)
-
-class FakeAwsDriver(FakeDriver):
-
-    def create_node(self, name=None,
-                    size=None,
-                    image=None,
-                    auth=None,
-                    ex_userdata=None,
-                    ex_metadata=None,
-                    ex_blockdevicemappings=None):
-        n = super(FakeAwsDriver, self).create_node(name=name,
-                                                      size=size,
-                                                      image=image,
-                                                      auth=auth,
-                                                      ex_metadata=ex_metadata,
-                                                      ex_userdata=ex_userdata)
-        n.extra = {
-            "launch_time": time.strftime(ARVADOS_TIMEFMT, time.gmtime())[:-1],
-            "tags" : {
-                "arvados_node_size": size.id
-            }
-        }
-        return n
-
-    def list_sizes(self, **kwargs):
-        return [NodeSize("m3.xlarge", "Extra Large Instance", 3500, 80, 0, 0, self),
-                NodeSize("m4.xlarge", "Extra Large Instance", 3500, 0, 0, 0, self),
-                NodeSize("m4.2xlarge", "Double Extra Large Instance", 7000, 0, 0, 0, self)]
-
-
-class FakeGceDriver(FakeDriver):
-
-    def create_node(self, name=None,
-                    size=None,
-                    image=None,
-                    auth=None,
-                    external_ip=None,
-                    ex_metadata=None,
-                    ex_tags=None,
-                    ex_disks_gce_struct=None):
-        n = super(FakeGceDriver, self).create_node(name=name,
-                                                   size=size,
-                                                   image=image,
-                                                   auth=auth,
-                                                   ex_metadata=ex_metadata)
-        n.extra = {
-            "metadata": {
-                "items": [{"key": k, "value": v} for k,v in ex_metadata.iteritems()],
-                "arvados_node_size": size.id
-            },
-            "zone": "fake"
-        }
-        return n
-
-    def list_images(self, ex_project=None):
-        return [NodeImage("fake_image_id", "fake_image_id", self)]
-
-    def list_sizes(self, **kwargs):
-        return [NodeSize("n1-standard-1", "Standard", 3750, None, 0, 0, self),
-                NodeSize("n1-standard-2", "Double standard", 7500, None, 0, 0, self)]
-
-    def ex_list_disktypes(self, zone=None):
-        return [GCEDiskType("pd-standard", "pd-standard", zone, self,
-                            extra={"selfLink": "pd-standard"}),
-                GCEDiskType("local-ssd", "local-ssd", zone, self,
-                            extra={"selfLink": "local-ssd"})]
-
-    def ex_get_node(self, name, zone=None):
-        global all_nodes
-        for n in all_nodes:
-            if n.id == name:
-                return n
-        return None
-
-    def ex_set_node_metadata(self, n, items):
-        n.extra["metadata"]["items"] = items
diff --git a/services/nodemanager/arvnodeman/timedcallback.py b/services/nodemanager/arvnodeman/timedcallback.py
deleted file mode 100644 (file)
index e7e3f25..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import heapq
-import time
-
-import pykka
-
-from .config import actor_class
-
-class TimedCallBackActor(actor_class):
-    """Send messages to other actors on a schedule.
-
-    Other actors can call the schedule() method to schedule delivery of a
-    message at a later time.  This actor runs the necessary event loop for
-    delivery.
-    """
-    def __init__(self, max_sleep=1, timefunc=None):
-        super(TimedCallBackActor, self).__init__()
-        self._proxy = self.actor_ref.tell_proxy()
-        self.messages = []
-        self.max_sleep = max_sleep
-        if timefunc is None:
-            self._timefunc = time.time
-        else:
-            self._timefunc = timefunc
-
-    def schedule(self, delivery_time, receiver, *args, **kwargs):
-        if not self.messages:
-            self._proxy.deliver()
-        heapq.heappush(self.messages, (delivery_time, receiver, args, kwargs))
-
-    def deliver(self):
-        if not self.messages:
-            return
-        til_next = self.messages[0][0] - self._timefunc()
-        if til_next <= 0:
-            t, receiver, args, kwargs = heapq.heappop(self.messages)
-            try:
-                receiver(*args, **kwargs)
-            except pykka.ActorDeadError:
-                pass
-        else:
-            time.sleep(min(til_next, self.max_sleep))
-        self._proxy.deliver()
diff --git a/services/nodemanager/bin/arvados-node-manager b/services/nodemanager/bin/arvados-node-manager
deleted file mode 100755 (executable)
index 72e0831..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-from arvnodeman.launcher import main
-main()
diff --git a/services/nodemanager/doc/azure.example.cfg b/services/nodemanager/doc/azure.example.cfg
deleted file mode 100644 (file)
index 8ba6801..0000000
+++ /dev/null
@@ -1,202 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-# Azure configuration for Arvados Node Manager.
-# All times are in seconds unless specified otherwise.
-
-[Manage]
-# The management server responds to http://addr:port/status.json with
-# a snapshot of internal state.
-
-# Management server listening address (default 127.0.0.1)
-#address = 0.0.0.0
-
-# Management server port number (default -1, server is disabled)
-#port = 8989
-
-[Daemon]
-# The dispatcher can customize the start and stop procedure for
-# cloud nodes.  For example, the SLURM dispatcher drains nodes
-# through SLURM before shutting them down.
-#dispatcher = slurm
-
-# Node Manager will ensure that there are at least this many nodes running at
-# all times.  If node manager needs to start new idle nodes for the purpose of
-# satisfying min_nodes, it will use the cheapest node type.  However, depending
-# on usage patterns, it may also satisfy min_nodes by keeping alive some
-# more-expensive nodes
-min_nodes = 0
-
-# Node Manager will not start any compute nodes when at least this
-# many are running.
-max_nodes = 8
-
-# Upper limit on rate of spending (in $/hr), will not boot additional nodes
-# if total price of already running nodes meets or exceeds this threshold.
-# default 0 means no limit.
-max_total_price = 0
-
-# Poll Azure nodes and Arvados for new information every N seconds.
-poll_time = 60
-
-# Polls have exponential backoff when services fail to respond.
-# This is the longest time to wait between polls.
-max_poll_time = 300
-
-# If Node Manager can't succesfully poll a service for this long,
-# it will never start or stop compute nodes, on the assumption that its
-# information is too outdated.
-poll_stale_after = 600
-
-# If Node Manager boots a cloud node, and it does not pair with an Arvados
-# node before this long, assume that there was a cloud bootstrap failure and
-# shut it down.  Note that normal shutdown windows apply (see the Cloud
-# section), so this should be shorter than the first shutdown window value.
-boot_fail_after = 1800
-
-# "Node stale time" affects two related behaviors.
-# 1. If a compute node has been running for at least this long, but it
-# isn't paired with an Arvados node, do not shut it down, but leave it alone.
-# This prevents the node manager from shutting down a node that might
-# actually be doing work, but is having temporary trouble contacting the
-# API server.
-# 2. When the Node Manager starts a new compute node, it will try to reuse
-# an Arvados node that hasn't been updated for this long.
-node_stale_after = 14400
-
-# Number of consecutive times a node must report as "idle" before it
-# will be considered eligible for shutdown.  Node status is checked
-# each poll period, and node can go idle at any point during a poll
-# period (meaning a node could be reported as idle that has only been
-# idle for 1 second).  With a 60 second poll period, three consecutive
-# status updates of "idle" suggests the node has been idle at least
-# 121 seconds.
-consecutive_idle_count = 3
-
-# Scaling factor to be applied to nodes' available RAM size. Usually there's a
-# variable discrepancy between the advertised RAM value on cloud nodes and the
-# actual amount available.
-# If not set, this value will be set to 0.95
-node_mem_scaling = 0.95
-
-# File path for Certificate Authorities
-certs_file = /etc/ssl/certs/ca-certificates.crt
-
-
-[Logging]
-# Log file path
-file = /var/log/arvados/node-manager.log
-
-# Log level for most Node Manager messages.
-# Choose one of DEBUG, INFO, WARNING, ERROR, or CRITICAL.
-# WARNING lets you know when polling a service fails.
-# INFO additionally lets you know when a compute node is started or stopped.
-level = INFO
-
-# You can also set different log levels for specific libraries.
-# Pykka is the Node Manager's actor library.
-# Setting this to DEBUG will display tracebacks for uncaught
-# exceptions in the actors, but it's also very chatty.
-pykka = WARNING
-
-# Setting apiclient to INFO will log the URL of every Arvados API request.
-apiclient = WARNING
-
-[Arvados]
-host = zyxwv.arvadosapi.com
-token = ARVADOS_TOKEN
-timeout = 15
-jobs_queue = yes   # Get work request from Arvados jobs queue (jobs API)
-slurm_queue = yes  # Get work request from squeue (containers API)
-
-# Accept an untrusted SSL certificate from the API server?
-insecure = no
-
-[Cloud]
-provider = azure
-
-# Shutdown windows define periods of time when a node may and may not be shut
-# down.  These are windows in full minutes, separated by commas.  Counting from
-# the time the node is booted, the node WILL NOT shut down for N1 minutes; then
-# it MAY shut down for N2 minutes; then it WILL NOT shut down for N3 minutes;
-# and so on.  For example, "20, 999999" means the node may shut down between
-# the 20th and 999999th minutes of uptime.
-# Azure bills by the minute, so it makes sense to agressively shut down idle
-# nodes.  Specify at least two windows.  You can add as many as you need beyond
-# that.
-shutdown_windows = 20, 999999
-
-[Cloud Credentials]
-# Use "azure account list" with the azure CLI to get these values.
-tenant_id = 00000000-0000-0000-0000-000000000000
-subscription_id = 00000000-0000-0000-0000-000000000000
-
-# The following directions are based on
-# https://azure.microsoft.com/en-us/documentation/articles/resource-group-authenticate-service-principal/
-#
-# azure config mode arm
-# azure ad app create --name "<Your Application Display Name>" --home-page "<https://YourApplicationHomePage>" --identifier-uris "<https://YouApplicationUri>" --password <Your_Password>
-# azure ad sp create "<Application_Id>"
-# azure role assignment create --objectId "<Object_Id>" -o Owner -c /subscriptions/{subscriptionId}/
-#
-# Use <Application_Id> for "key" and the <Your_Password> for "secret"
-#
-key = 00000000-0000-0000-0000-000000000000
-secret = PASSWORD
-timeout = 60
-region = East US
-
-[Cloud List]
-# The resource group in which the compute node virtual machines will be created
-# and listed.
-ex_resource_group = ArvadosResourceGroup
-
-[Cloud Create]
-# The image id, in the form "Publisher:Offer:SKU:Version"
-image = Canonical:UbuntuServer:14.04.3-LTS:14.04.201508050
-
-# Path to a local ssh key file that will be used to provision new nodes.
-ssh_key = /home/arvadosuser/.ssh/id_rsa.pub
-
-# The account name for the admin user that will be provisioned on new nodes.
-ex_user_name = arvadosuser
-
-# The Azure storage account that will be used to store the node OS disk images.
-ex_storage_account = arvadosstorage
-
-# The virtual network the VMs will be associated with.
-ex_network = ArvadosNetwork
-
-# Optional subnet of the virtual network.
-#ex_subnet = default
-
-# Node tags
-tag_arvados-class = dynamic-compute
-tag_cluster = zyxwv
-
-# the API server to ping
-ping_host = hostname:port
-
-# You can define any number of Size sections to list Azure sizes you're willing
-# to use.  The Node Manager should boot the cheapest size(s) that can run jobs
-# in the queue.  You must also provide price per hour as the Azure driver
-# compute currently does not report prices.
-#
-# See https://azure.microsoft.com/en-us/pricing/details/virtual-machines/
-# for a list of known machine types that may be used as a Size parameter.
-#
-# Each size section MUST define the number of cores are available in this
-# size class (since libcloud does not provide any consistent API for exposing
-# this setting).
-# You may also want to define the amount of scratch space (expressed
-# in GB) for Crunch jobs.  You can also override Microsoft's provided
-# data fields by setting them here.
-
-[Size Standard_D3]
-cores = 4
-price = 0.56
-
-[Size Standard_D4]
-cores = 8
-price = 1.12
diff --git a/services/nodemanager/doc/ec2.example.cfg b/services/nodemanager/doc/ec2.example.cfg
deleted file mode 100644 (file)
index 3bc905b..0000000
+++ /dev/null
@@ -1,205 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-# EC2 configuration for Arvados Node Manager.
-# All times are in seconds unless specified otherwise.
-
-[Manage]
-# The management server responds to http://addr:port/status.json with
-# a snapshot of internal state.
-
-# Management server listening address (default 127.0.0.1)
-#address = 0.0.0.0
-
-# Management server port number (default -1, server is disabled)
-#port = 8989
-
-[Daemon]
-# The dispatcher can customize the start and stop procedure for
-# cloud nodes.  For example, the SLURM dispatcher drains nodes
-# through SLURM before shutting them down.
-#dispatcher = slurm
-
-# Node Manager will ensure that there are at least this many nodes running at
-# all times.  If node manager needs to start new idle nodes for the purpose of
-# satisfying min_nodes, it will use the cheapest node type.  However, depending
-# on usage patterns, it may also satisfy min_nodes by keeping alive some
-# more-expensive nodes
-min_nodes = 0
-
-# Node Manager will not start any compute nodes when at least this
-# many are running.
-max_nodes = 8
-
-# Upper limit on rate of spending (in $/hr), will not boot additional nodes
-# if total price of already running nodes meets or exceeds this threshold.
-# default 0 means no limit.
-max_total_price = 0
-
-# Poll EC2 nodes and Arvados for new information every N seconds.
-poll_time = 60
-
-# Polls have exponential backoff when services fail to respond.
-# This is the longest time to wait between polls.
-max_poll_time = 300
-
-# If Node Manager can't succesfully poll a service for this long,
-# it will never start or stop compute nodes, on the assumption that its
-# information is too outdated.
-poll_stale_after = 600
-
-# If Node Manager boots a cloud node, and it does not pair with an Arvados
-# node before this long, assume that there was a cloud bootstrap failure and
-# shut it down.  Note that normal shutdown windows apply (see the Cloud
-# section), so this should be shorter than the first shutdown window value.
-boot_fail_after = 1800
-
-# "Node stale time" affects two related behaviors.
-# 1. If a compute node has been running for at least this long, but it
-# isn't paired with an Arvados node, do not shut it down, but leave it alone.
-# This prevents the node manager from shutting down a node that might
-# actually be doing work, but is having temporary trouble contacting the
-# API server.
-# 2. When the Node Manager starts a new compute node, it will try to reuse
-# an Arvados node that hasn't been updated for this long.
-node_stale_after = 14400
-
-# Number of consecutive times a node must report as "idle" before it
-# will be considered eligible for shutdown.  Node status is checked
-# each poll period, and node can go idle at any point during a poll
-# period (meaning a node could be reported as idle that has only been
-# idle for 1 second).  With a 60 second poll period, three consecutive
-# status updates of "idle" suggests the node has been idle at least
-# 121 seconds.
-consecutive_idle_count = 3
-
-# Scaling factor to be applied to nodes' available RAM size. Usually there's a
-# variable discrepancy between the advertised RAM value on cloud nodes and the
-# actual amount available.
-# If not set, this value will be set to 0.95
-node_mem_scaling = 0.95
-
-# File path for Certificate Authorities
-certs_file = /etc/ssl/certs/ca-certificates.crt
-
-[Logging]
-# Log file path
-file = /var/log/arvados/node-manager.log
-
-# Log level for most Node Manager messages.
-# Choose one of DEBUG, INFO, WARNING, ERROR, or CRITICAL.
-# WARNING lets you know when polling a service fails.
-# INFO additionally lets you know when a compute node is started or stopped.
-level = INFO
-
-# You can also set different log levels for specific libraries.
-# Pykka is the Node Manager's actor library.
-# Setting this to DEBUG will display tracebacks for uncaught
-# exceptions in the actors, but it's also very chatty.
-pykka = WARNING
-
-# Setting apiclient to INFO will log the URL of every Arvados API request.
-apiclient = WARNING
-
-[Arvados]
-host = zyxwv.arvadosapi.com
-token = ARVADOS_TOKEN
-timeout = 15
-jobs_queue = yes   # Get work request from Arvados jobs queue (jobs API)
-slurm_queue = yes  # Get work request from squeue (containers API)
-
-# Accept an untrusted SSL certificate from the API server?
-insecure = no
-
-[Cloud]
-provider = ec2
-
-# It's usually most cost-effective to shut down compute nodes during narrow
-# windows of time.  For example, EC2 bills each node by the hour, so the best
-# time to shut down a node is right before a new hour of uptime starts.
-# Shutdown windows define these periods of time.  These are windows in
-# full minutes, separated by commas.  Counting from the time the node is
-# booted, the node WILL NOT shut down for N1 minutes; then it MAY shut down
-# for N2 minutes; then it WILL NOT shut down for N3 minutes; and so on.
-# For example, "54, 5, 1" means the node may shut down from the 54th to the
-# 59th minute of each hour of uptime.
-# Specify at least two windows.  You can add as many as you need beyond that.
-shutdown_windows = 54, 5, 1
-
-[Cloud Credentials]
-key = KEY
-secret = SECRET_KEY
-region = us-east-1
-timeout = 60
-
-[Cloud List]
-# This section defines filters that find compute nodes.
-# Tags that you specify here will automatically be added to nodes you create.
-# Replace colons in Amazon filters with underscores
-# (e.g., write "tag:mytag" as "tag_mytag").
-instance-state-name = running
-tag_arvados-class = dynamic-compute
-tag_cluster = zyxwv
-
-[Cloud Create]
-# New compute nodes will send pings to Arvados at this host.
-# You may specify a port, and use brackets to disambiguate IPv6 addresses.
-ping_host = hostname:port
-
-# Give the name of an SSH key on AWS...
-ex_keyname = string
-
-# ... or a file path for an SSH key that can log in to the compute node.
-# (One or the other, not both.)
-# ssh_key = path
-
-# The EC2 IDs of the image and subnet compute nodes should use.
-image_id = idstring
-subnet_id = idstring
-
-# Comma-separated EC2 IDs for the security group(s) assigned to each
-# compute node.
-security_groups = idstring1, idstring2
-
-# Apply an Instance Profile ARN to the newly created compute nodes
-# For more info, see:
-# https://aws.amazon.com/premiumsupport/knowledge-center/iam-policy-restrict-vpc/
-# ex_iamprofile = arn:aws:iam::ACCOUNTNUMBER:instance-profile/ROLENAME
-
-
-# You can define any number of Size sections to list EC2 sizes you're
-# willing to use.  The Node Manager should boot the cheapest size(s) that
-# can run jobs in the queue.
-#
-# Each size section MUST define the number of cores are available in this
-# size class (since libcloud does not provide any consistent API for exposing
-# this setting).
-# You may also want to define the amount of scratch space (expressed
-# in MB) for Crunch jobs.  You can also override Amazon's provided
-# data fields (such as price per hour) by setting them here.
-#
-# Additionally, you can ask for a preemptible instance (AWS's spot instance)
-# by adding the appropriate boolean configuration flag. If you want to have
-# both spot & reserved versions of the same size, you can do so by renaming
-# the Size section and specifying the instance type inside it.
-
-# 100 GB scratch space
-[Size m4.large]
-cores = 2
-price = 0.126
-scratch = 100000
-
-# 10 GB scratch space
-[Size m4.large.spot]
-instance_type = m4.large
-preemptible = true
-cores = 2
-price = 0.126
-scratch = 10000
-
-# 200 GB scratch space
-[Size m4.xlarge]
-cores = 4
-price = 0.252
-scratch = 200000
diff --git a/services/nodemanager/doc/gce.example.cfg b/services/nodemanager/doc/gce.example.cfg
deleted file mode 100644 (file)
index acd3fd1..0000000
+++ /dev/null
@@ -1,187 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-# Google Compute Engine configuration for Arvados Node Manager.
-# All times are in seconds unless specified otherwise.
-
-[Manage]
-# The management server responds to http://addr:port/status.json with
-# a snapshot of internal state.
-
-# Management server listening address (default 127.0.0.1)
-#address = 0.0.0.0
-
-# Management server port number (default -1, server is disabled)
-#port = 8989
-
-[Daemon]
-# Node Manager will ensure that there are at least this many nodes running at
-# all times.  If node manager needs to start new idle nodes for the purpose of
-# satisfying min_nodes, it will use the cheapest node type.  However, depending
-# on usage patterns, it may also satisfy min_nodes by keeping alive some
-# more-expensive nodes
-min_nodes = 0
-
-# Node Manager will not start any compute nodes when at least this
-# running at all times.  By default, these will be the cheapest node size.
-max_nodes = 8
-
-# Poll compute nodes and Arvados for new information every N seconds.
-poll_time = 60
-
-# Upper limit on rate of spending (in $/hr), will not boot additional nodes
-# if total price of already running nodes meets or exceeds this threshold.
-# default 0 means no limit.
-max_total_price = 0
-
-# Polls have exponential backoff when services fail to respond.
-# This is the longest time to wait between polls.
-max_poll_time = 300
-
-# If Node Manager can't succesfully poll a service for this long,
-# it will never start or stop compute nodes, on the assumption that its
-# information is too outdated.
-poll_stale_after = 600
-
-# "Node stale time" affects two related behaviors.
-# 1. If a compute node has been running for at least this long, but it
-# isn't paired with an Arvados node, do not shut it down, but leave it alone.
-# This prevents the node manager from shutting down a node that might
-# actually be doing work, but is having temporary trouble contacting the
-# API server.
-# 2. When the Node Manager starts a new compute node, it will try to reuse
-# an Arvados node that hasn't been updated for this long.
-node_stale_after = 14400
-
-# Number of consecutive times a node must report as "idle" before it
-# will be considered eligible for shutdown.  Node status is checked
-# each poll period, and node can go idle at any point during a poll
-# period (meaning a node could be reported as idle that has only been
-# idle for 1 second).  With a 60 second poll period, three consecutive
-# status updates of "idle" suggests the node has been idle at least
-# 121 seconds.
-consecutive_idle_count = 3
-
-# Scaling factor to be applied to nodes' available RAM size. Usually there's a
-# variable discrepancy between the advertised RAM value on cloud nodes and the
-# actual amount available.
-# If not set, this value will be set to 0.95
-node_mem_scaling = 0.95
-
-# File path for Certificate Authorities
-certs_file = /etc/ssl/certs/ca-certificates.crt
-
-[Logging]
-# Log file path
-file = /var/log/arvados/node-manager.log
-
-# Log level for most Node Manager messages.
-# Choose one of DEBUG, INFO, WARNING, ERROR, or CRITICAL.
-# WARNING lets you know when polling a service fails.
-# INFO additionally lets you know when a compute node is started or stopped.
-level = INFO
-
-# You can also set different log levels for specific libraries.
-# Pykka is the Node Manager's actor library.
-# Setting this to DEBUG will display tracebacks for uncaught
-# exceptions in the actors, but it's also very chatty.
-pykka = WARNING
-
-# Setting apiclient to INFO will log the URL of every Arvados API request.
-apiclient = WARNING
-
-[Arvados]
-host = zyxwv.arvadosapi.com
-token = ARVADOS_TOKEN
-timeout = 15
-jobs_queue = yes   # Get work request from Arvados jobs queue (jobs API)
-slurm_queue = yes  # Get work request from squeue (containers API)
-
-# Accept an untrusted SSL certificate from the API server?
-insecure = no
-
-[Cloud]
-provider = gce
-
-# Shutdown windows define periods of time when a node may and may not
-# be shut down.  These are windows in full minutes, separated by
-# commas.  Counting from the time the node is booted, the node WILL
-# NOT shut down for N1 minutes; then it MAY shut down for N2 minutes;
-# then it WILL NOT shut down for N3 minutes; and so on.  For example,
-# "54, 5, 1" means the node may shut down from the 54th to the 59th
-# minute of each hour of uptime.
-# GCE bills by the minute, and does not provide information about when
-# a node booted.  Node Manager will store this information in metadata
-# when it boots a node; if that information is not available, it will
-# assume the node booted at the epoch.  These shutdown settings are
-# very aggressive.  You may want to adjust this if you want more
-# continuity of service from a single node.
-shutdown_windows = 20, 999999
-
-[Cloud Credentials]
-user_id = client_email_address@developer.gserviceaccount.com
-key = path_to_certificate.pem
-project = project-id-from-google-cloud-dashboard
-timeout = 60
-
-# Valid location (zone) names: https://cloud.google.com/compute/docs/zones
-datacenter = us-central1-a
-
-# Optional settings. For full documentation see
-# http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#libcloud.compute.drivers.gce.GCENodeDriver
-#
-# auth_type = SA               # SA, IA or GCE
-# scopes = https://www.googleapis.com/auth/compute
-# credential_file =
-
-[Cloud List]
-# A comma-separated list of tags that must be applied to a node for it to
-# be considered a compute node.
-# The driver will automatically apply these tags to nodes it creates.
-tags = zyxwv, compute
-
-[Cloud Create]
-# New compute nodes will send pings to Arvados at this host.
-# You may specify a port, and use brackets to disambiguate IPv6 addresses.
-ping_host = hostname:port
-
-# A file path for an SSH key that can log in to the compute node.
-# ssh_key = path
-
-# The GCE image name and network zone name to use when creating new nodes.
-image = debian-7
-# network = your_network_name
-
-# JSON string of service account authorizations for this cluster.
-# See http://libcloud.readthedocs.org/en/latest/compute/drivers/gce.html#specifying-service-account-scopes
-# service_accounts = [{'email':'account@example.com', 'scopes':['storage-ro']}]
-
-
-# You can define any number of Size sections to list node sizes you're
-# willing to use.  The Node Manager should boot the cheapest size(s) that
-# can run jobs in the queue.
-#
-# The Size fields are interpreted the same way as with a libcloud NodeSize:
-# http://libcloud.readthedocs.org/en/latest/compute/api.html#libcloud.compute.base.NodeSize
-#
-# See https://cloud.google.com/compute/docs/machine-types for a list
-# of known machine types that may be used as a Size parameter.
-#
-# Each size section MUST define the number of cores are available in this
-# size class (since libcloud does not provide any consistent API for exposing
-# this setting).
-# You may also want to define the amount of scratch space (expressed
-# in GB) for Crunch jobs.
-# You can also override Google's provided data fields (such as price per hour)
-# by setting them here.
-
-[Size n1-standard-2]
-cores = 2
-price = 0.076
-scratch = 100
-
-[Size n1-standard-4]
-cores = 4
-price = 0.152
-scratch = 200
\ No newline at end of file
diff --git a/services/nodemanager/doc/local.example.cfg b/services/nodemanager/doc/local.example.cfg
deleted file mode 100644 (file)
index 1221775..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-# You can use this configuration to run a development Node Manager for
-# testing.  It uses libcloud's dummy driver and your own development API server.
-# When new cloud nodes are created, you'll need to simulate the ping that
-# they send to the Arvados API server.  The easiest way I've found to do that
-# is through the API server Rails console: load the Node object, set its
-# IP address to 10.10.0.N (where N is the cloud node's ID), and save.
-
-[Manage]
-address = 0.0.0.0
-port = 8989
-
-[Daemon]
-min_nodes = 0
-max_nodes = 8
-poll_time = 15
-max_poll_time = 60
-poll_stale_after = 600
-node_stale_after = 300
-certs_file = /etc/ssl/certs/ca-certificates.crt
-
-[Logging]
-level = DEBUG
-pykka = DEBUG
-apiclient = WARNING
-
-[Arvados]
-host = localhost:3030
-# This is the token for the text fixture's admin user.
-token = 4axaw8zxe0qm22wa6urpp5nskcne8z88cvbupv653y1njyi05h
-insecure = yes
-timeout = 15
-
-[Cloud]
-provider = dummy
-shutdown_windows = 1, 1
-timeout = 15
-
-[Cloud Credentials]
-creds = dummycreds
-
-[Cloud List]
-[Cloud Create]
-
-[Size 2]
-cores = 4
-scratch = 1234
diff --git a/services/nodemanager/fpm-info.sh b/services/nodemanager/fpm-info.sh
deleted file mode 100644 (file)
index c4a9dbb..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-case "$TARGET" in
-    debian* | ubuntu*)
-        fpm_depends+=(libcurl3-gnutls libpython2.7)
-        ;;
-esac
diff --git a/services/nodemanager/gittaggers.py b/services/nodemanager/gittaggers.py
deleted file mode 120000 (symlink)
index a9ad861..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../../sdk/python/gittaggers.py
\ No newline at end of file
diff --git a/services/nodemanager/setup.py b/services/nodemanager/setup.py
deleted file mode 100644 (file)
index 75e8f85..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import
-import os
-import sys
-import re
-
-from setuptools import setup, find_packages
-
-SETUP_DIR = os.path.dirname(__file__) or '.'
-README = os.path.join(SETUP_DIR, 'README.rst')
-
-import arvados_version
-version = arvados_version.get_version(SETUP_DIR, "arvnodeman")
-if os.environ.get('ARVADOS_BUILDING_VERSION', False):
-    pysdk_dep = "=={}".format(version)
-else:
-    # On dev releases, arvados-python-client may have a different timestamp
-    pysdk_dep = "<={}".format(version)
-
-short_tests_only = False
-if '--short-tests-only' in sys.argv:
-    short_tests_only = True
-    sys.argv.remove('--short-tests-only')
-
-setup(name='arvados-node-manager',
-      version=version,
-      description='Arvados compute node manager',
-      long_description=open(README).read(),
-      author='Arvados',
-      author_email='info@arvados.org',
-      url="https://arvados.org",
-      license='GNU Affero General Public License, version 3.0',
-      packages=find_packages(),
-      scripts=['bin/arvados-node-manager'],
-      data_files=[
-          ('share/doc/arvados-node-manager', ['agpl-3.0.txt', 'README.rst', 'arvados-node-manager.service']),
-      ],
-      install_requires=[
-          'apache-libcloud==2.5.0', # 2.6.0 cannot create azure nodes, #15649
-          'arvados-python-client{}'.format(pysdk_dep),
-          'future',
-          'pykka < 2',
-          'python-daemon',
-          'setuptools',
-          'subprocess32>=3.5.1',
-      ],
-      test_suite='tests',
-      tests_require=[
-          'requests',
-          'pbr<1.7.0',
-          'mock>=1.0',
-          'apache-libcloud==2.5.0',
-          'subprocess32>=3.5.1',
-      ],
-      zip_safe=False,
-)
diff --git a/services/nodemanager/tests/__init__.py b/services/nodemanager/tests/__init__.py
deleted file mode 100644 (file)
index 20e02f9..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-import logging
-import os
-
-# Set the ANMTEST_LOGLEVEL environment variable to enable logging at that level.
-loglevel = os.environ.get('ANMTEST_LOGLEVEL', 'CRITICAL')
-logging.basicConfig(level=getattr(logging, loglevel.upper()))
-
-# Set the ANMTEST_TIMEOUT environment variable to the maximum amount of time to
-# wait for tested actors to respond to important messages.  The default value
-# is very conservative, because a small value may produce false negatives on
-# slower systems.  If you're debugging a known timeout issue, however, you may
-# want to set this lower to speed up tests.
-pykka_timeout = int(os.environ.get('ANMTEST_TIMEOUT', '10'))
diff --git a/services/nodemanager/tests/fake_azure.cfg.template b/services/nodemanager/tests/fake_azure.cfg.template
deleted file mode 100644 (file)
index e5deac8..0000000
+++ /dev/null
@@ -1,194 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-# Azure configuration for Arvados Node Manager.
-# All times are in seconds unless specified otherwise.
-
-[Manage]
-# The management server responds to http://addr:port/status.json with
-# a snapshot of internal state.
-
-# Management server listening address (default 127.0.0.1)
-address = 0.0.0.0
-
-# Management server port number (default -1, server is disabled)
-port = 8989
-
-MangementToken = xxx
-
-[Daemon]
-# The dispatcher can customize the start and stop procedure for
-# cloud nodes.  For example, the SLURM dispatcher drains nodes
-# through SLURM before shutting them down.
-#dispatcher = slurm
-
-# Node Manager will ensure that there are at least this many nodes running at
-# all times.  If node manager needs to start new idle nodes for the purpose of
-# satisfying min_nodes, it will use the cheapest node type.  However, depending
-# on usage patterns, it may also satisfy min_nodes by keeping alive some
-# more-expensive nodes
-min_nodes = 0
-
-# Node Manager will not start any compute nodes when at least this
-# many are running.
-max_nodes = 8
-
-# Upper limit on rate of spending (in $/hr), will not boot additional nodes
-# if total price of already running nodes meets or exceeds this threshold.
-# default 0 means no limit.
-max_total_price = 0
-
-# Poll Azure nodes and Arvados for new information every N seconds.
-poll_time = 0.5
-
-# Polls have exponential backoff when services fail to respond.
-# This is the longest time to wait between polls.
-max_poll_time = 1
-
-# If Node Manager can't succesfully poll a service for this long,
-# it will never start or stop compute nodes, on the assumption that its
-# information is too outdated.
-poll_stale_after = 1
-
-# If Node Manager boots a cloud node, and it does not pair with an Arvados
-# node before this long, assume that there was a cloud bootstrap failure and
-# shut it down.  Note that normal shutdown windows apply (see the Cloud
-# section), so this should be shorter than the first shutdown window value.
-boot_fail_after = 45
-
-# "Node stale time" affects two related behaviors.
-# 1. If a compute node has been running for at least this long, but it
-# isn't paired with an Arvados node, do not shut it down, but leave it alone.
-# This prevents the node manager from shutting down a node that might
-# actually be doing work, but is having temporary trouble contacting the
-# API server.
-# 2. When the Node Manager starts a new compute node, it will try to reuse
-# an Arvados node that hasn't been updated for this long.
-node_stale_after = 14400
-
-# Scaling factor to be applied to nodes' available RAM size. Usually there's a
-# variable discrepancy between the advertised RAM value on cloud nodes and the
-# actual amount available.
-# If not set, this value will be set to 0.95
-node_mem_scaling = 0.95
-
-# File path for Certificate Authorities
-certs_file = /etc/ssl/certs/ca-certificates.crt
-
-[Logging]
-# Log file path
-#file = node-manager.log
-
-# Log level for most Node Manager messages.
-# Choose one of DEBUG, INFO, WARNING, ERROR, or CRITICAL.
-# WARNING lets you know when polling a service fails.
-# INFO additionally lets you know when a compute node is started or stopped.
-level = DEBUG
-
-# You can also set different log levels for specific libraries.
-# Pykka is the Node Manager's actor library.
-# Setting this to DEBUG will display tracebacks for uncaught
-# exceptions in the actors, but it's also very chatty.
-pykka = WARNING
-
-# Setting apiclient to INFO will log the URL of every Arvados API request.
-apiclient = WARNING
-
-[Arvados]
-host = {host}
-token = {token}
-timeout = 15
-jobs_queue = no
-
-# Accept an untrusted SSL certificate from the API server?
-insecure = yes
-
-[Cloud]
-provider = azure
-driver_class = {driver_class}
-
-# Shutdown windows define periods of time when a node may and may not be shut
-# down.  These are windows in full minutes, separated by commas.  Counting from
-# the time the node is booted, the node WILL NOT shut down for N1 minutes; then
-# it MAY shut down for N2 minutes; then it WILL NOT shut down for N3 minutes;
-# and so on.  For example, "20, 999999" means the node may shut down between
-# the 20th and 999999th minutes of uptime.
-# Azure bills by the minute, so it makes sense to agressively shut down idle
-# nodes.  Specify at least two windows.  You can add as many as you need beyond
-# that.
-shutdown_windows = 0.05, 999999
-
-[Cloud Credentials]
-# Use "azure account list" with the azure CLI to get these values.
-tenant_id = 00000000-0000-0000-0000-000000000000
-subscription_id = 00000000-0000-0000-0000-000000000000
-
-# The following directions are based on
-# https://azure.microsoft.com/en-us/documentation/articles/resource-group-authenticate-service-principal/
-#
-# azure config mode arm
-# azure ad app create --name "<Your Application Display Name>" --home-page "<https://YourApplicationHomePage>" --identifier-uris "<https://YouApplicationUri>" --password <Your_Password>
-# azure ad sp create "<Application_Id>"
-# azure role assignment create --objectId "<Object_Id>" -o Owner -c /subscriptions/<subscriptionId>/
-#
-# Use <Application_Id> for "key" and the <Your_Password> for "secret"
-#
-key = 00000000-0000-0000-0000-000000000000
-secret = PASSWORD
-timeout = 60
-region = East US
-
-[Cloud List]
-# The resource group in which the compute node virtual machines will be created
-# and listed.
-ex_resource_group = ArvadosResourceGroup
-
-[Cloud Create]
-# The image id, in the form "Publisher:Offer:SKU:Version"
-image = Canonical:UbuntuServer:14.04.3-LTS:14.04.201508050
-
-# Path to a local ssh key file that will be used to provision new nodes.
-ssh_key = {ssh_key}
-
-# The account name for the admin user that will be provisioned on new nodes.
-ex_user_name = arvadosuser
-
-# The Azure storage account that will be used to store the node OS disk images.
-ex_storage_account = arvadosstorage
-
-# The virtual network the VMs will be associated with.
-ex_network = ArvadosNetwork
-
-# Optional subnet of the virtual network.
-#ex_subnet = default
-
-# Node tags
-tag_arvados-class = dynamic-compute
-tag_cluster = zyxwv
-
-# the API server to ping
-ping_host = {host}
-
-# You can define any number of Size sections to list Azure sizes you're willing
-# to use.  The Node Manager should boot the cheapest size(s) that can run jobs
-# in the queue.  You must also provide price per hour as the Azure driver
-# compute currently does not report prices.
-#
-# See https://azure.microsoft.com/en-us/pricing/details/virtual-machines/
-# for a list of known machine types that may be used as a Size parameter.
-#
-# Each size section MUST define the number of cores are available in this
-# size class (since libcloud does not provide any consistent API for exposing
-# this setting).
-# You may also want to define the amount of scratch space (expressed
-# in GB) for Crunch jobs.  You can also override Microsoft's provided
-# data fields by setting them here.
-
-[Size Standard_D3]
-cores = 4
-price = 0.56
-
-[Size Standard_D4]
-cores = 8
-price = 1.12
diff --git a/services/nodemanager/tests/fake_ec2.cfg.template b/services/nodemanager/tests/fake_ec2.cfg.template
deleted file mode 100644 (file)
index 2bb7d0e..0000000
+++ /dev/null
@@ -1,162 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-# Azure configuration for Arvados Node Manager.
-# All times are in seconds unless specified otherwise.
-
-[Manage]
-# The management server responds to http://addr:port/status.json with
-# a snapshot of internal state.
-
-# Management server listening address (default 127.0.0.1)
-#address = 0.0.0.0
-
-# Management server port number (default -1, server is disabled)
-#port = 8989
-
-[Daemon]
-# The dispatcher can customize the start and stop procedure for
-# cloud nodes.  For example, the SLURM dispatcher drains nodes
-# through SLURM before shutting them down.
-#dispatcher = slurm
-
-# Node Manager will ensure that there are at least this many nodes running at
-# all times.  If node manager needs to start new idle nodes for the purpose of
-# satisfying min_nodes, it will use the cheapest node type.  However, depending
-# on usage patterns, it may also satisfy min_nodes by keeping alive some
-# more-expensive nodes
-min_nodes = 0
-
-# Node Manager will not start any compute nodes when at least this
-# many are running.
-max_nodes = 8
-
-# Upper limit on rate of spending (in $/hr), will not boot additional nodes
-# if total price of already running nodes meets or exceeds this threshold.
-# default 0 means no limit.
-max_total_price = 0
-
-# Poll Azure nodes and Arvados for new information every N seconds.
-poll_time = 0.5
-
-# Polls have exponential backoff when services fail to respond.
-# This is the longest time to wait between polls.
-max_poll_time = 1
-
-# If Node Manager can't succesfully poll a service for this long,
-# it will never start or stop compute nodes, on the assumption that its
-# information is too outdated.
-poll_stale_after = 1
-
-# If Node Manager boots a cloud node, and it does not pair with an Arvados
-# node before this long, assume that there was a cloud bootstrap failure and
-# shut it down.  Note that normal shutdown windows apply (see the Cloud
-# section), so this should be shorter than the first shutdown window value.
-boot_fail_after = 45
-
-# "Node stale time" affects two related behaviors.
-# 1. If a compute node has been running for at least this long, but it
-# isn't paired with an Arvados node, do not shut it down, but leave it alone.
-# This prevents the node manager from shutting down a node that might
-# actually be doing work, but is having temporary trouble contacting the
-# API server.
-# 2. When the Node Manager starts a new compute node, it will try to reuse
-# an Arvados node that hasn't been updated for this long.
-node_stale_after = 14400
-
-# Scaling factor to be applied to nodes' available RAM size. Usually there's a
-# variable discrepancy between the advertised RAM value on cloud nodes and the
-# actual amount available.
-# If not set, this value will be set to 0.95
-node_mem_scaling = 0.95
-
-# File path for Certificate Authorities
-certs_file = /etc/ssl/certs/ca-certificates.crt
-
-[Logging]
-# Log file path
-#file = node-manager.log
-
-# Log level for most Node Manager messages.
-# Choose one of DEBUG, INFO, WARNING, ERROR, or CRITICAL.
-# WARNING lets you know when polling a service fails.
-# INFO additionally lets you know when a compute node is started or stopped.
-level = DEBUG
-
-# You can also set different log levels for specific libraries.
-# Pykka is the Node Manager's actor library.
-# Setting this to DEBUG will display tracebacks for uncaught
-# exceptions in the actors, but it's also very chatty.
-pykka = WARNING
-
-# Setting apiclient to INFO will log the URL of every Arvados API request.
-apiclient = WARNING
-
-[Arvados]
-host = {host}
-token = {token}
-timeout = 15
-jobs_queue = no
-
-# Accept an untrusted SSL certificate from the API server?
-insecure = yes
-
-[Cloud]
-provider = ec2
-driver_class = {driver_class}
-
-# Shutdown windows define periods of time when a node may and may not be shut
-# down.  These are windows in full minutes, separated by commas.  Counting from
-# the time the node is booted, the node WILL NOT shut down for N1 minutes; then
-# it MAY shut down for N2 minutes; then it WILL NOT shut down for N3 minutes;
-# and so on.  For example, "20, 999999" means the node may shut down between
-# the 20th and 999999th minutes of uptime.
-# Azure bills by the minute, so it makes sense to agressively shut down idle
-# nodes.  Specify at least two windows.  You can add as many as you need beyond
-# that.
-shutdown_windows = 0.05, 999999
-
-[Cloud Credentials]
-
-key = 00000000-0000-0000-0000-000000000000
-secret = PASSWORD
-timeout = 60
-region = East US
-
-[Cloud List]
-
-[Cloud Create]
-# The image id
-image = fake_image_id
-
-# Path to a local ssh key file that will be used to provision new nodes.
-ssh_key = {ssh_key}
-
-# the API server to ping
-ping_host = {host}
-
-# You can define any number of Size sections to list Azure sizes you're willing
-# to use.  The Node Manager should boot the cheapest size(s) that can run jobs
-# in the queue.  You must also provide price per hour as the Azure driver
-# compute currently does not report prices.
-#
-# See https://azure.microsoft.com/en-us/pricing/details/virtual-machines/
-# for a list of known machine types that may be used as a Size parameter.
-#
-# Each size section MUST define the number of cores are available in this
-# size class (since libcloud does not provide any consistent API for exposing
-# this setting).
-# You may also want to define the amount of scratch space (expressed
-# in GB) for Crunch jobs.  You can also override Microsoft's provided
-# data fields by setting them here.
-
-[Size m4.xlarge]
-cores = 4
-price = 0.56
-scratch = 250
-
-[Size m4.2xlarge]
-cores = 8
-price = 1.12
-scratch = 500
diff --git a/services/nodemanager/tests/fake_gce.cfg.template b/services/nodemanager/tests/fake_gce.cfg.template
deleted file mode 100644 (file)
index 11131ef..0000000
+++ /dev/null
@@ -1,159 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-# Azure configuration for Arvados Node Manager.
-# All times are in seconds unless specified otherwise.
-
-[Manage]
-# The management server responds to http://addr:port/status.json with
-# a snapshot of internal state.
-
-# Management server listening address (default 127.0.0.1)
-#address = 0.0.0.0
-
-# Management server port number (default -1, server is disabled)
-#port = 8989
-
-[Daemon]
-# The dispatcher can customize the start and stop procedure for
-# cloud nodes.  For example, the SLURM dispatcher drains nodes
-# through SLURM before shutting them down.
-#dispatcher = slurm
-
-# Node Manager will ensure that there are at least this many nodes running at
-# all times.  If node manager needs to start new idle nodes for the purpose of
-# satisfying min_nodes, it will use the cheapest node type.  However, depending
-# on usage patterns, it may also satisfy min_nodes by keeping alive some
-# more-expensive nodes
-min_nodes = 0
-
-# Node Manager will not start any compute nodes when at least this
-# many are running.
-max_nodes = 8
-
-# Upper limit on rate of spending (in $/hr), will not boot additional nodes
-# if total price of already running nodes meets or exceeds this threshold.
-# default 0 means no limit.
-max_total_price = 0
-
-# Poll Azure nodes and Arvados for new information every N seconds.
-poll_time = 0.5
-
-# Polls have exponential backoff when services fail to respond.
-# This is the longest time to wait between polls.
-max_poll_time = 1
-
-# If Node Manager can't succesfully poll a service for this long,
-# it will never start or stop compute nodes, on the assumption that its
-# information is too outdated.
-poll_stale_after = 1
-
-# If Node Manager boots a cloud node, and it does not pair with an Arvados
-# node before this long, assume that there was a cloud bootstrap failure and
-# shut it down.  Note that normal shutdown windows apply (see the Cloud
-# section), so this should be shorter than the first shutdown window value.
-boot_fail_after = 45
-
-# "Node stale time" affects two related behaviors.
-# 1. If a compute node has been running for at least this long, but it
-# isn't paired with an Arvados node, do not shut it down, but leave it alone.
-# This prevents the node manager from shutting down a node that might
-# actually be doing work, but is having temporary trouble contacting the
-# API server.
-# 2. When the Node Manager starts a new compute node, it will try to reuse
-# an Arvados node that hasn't been updated for this long.
-node_stale_after = 14400
-
-# Scaling factor to be applied to nodes' available RAM size. Usually there's a
-# variable discrepancy between the advertised RAM value on cloud nodes and the
-# actual amount available.
-# If not set, this value will be set to 0.95
-node_mem_scaling = 0.95
-
-# File path for Certificate Authorities
-certs_file = /etc/ssl/certs/ca-certificates.crt
-
-[Logging]
-# Log file path
-#file = node-manager.log
-
-# Log level for most Node Manager messages.
-# Choose one of DEBUG, INFO, WARNING, ERROR, or CRITICAL.
-# WARNING lets you know when polling a service fails.
-# INFO additionally lets you know when a compute node is started or stopped.
-level = DEBUG
-
-# You can also set different log levels for specific libraries.
-# Pykka is the Node Manager's actor library.
-# Setting this to DEBUG will display tracebacks for uncaught
-# exceptions in the actors, but it's also very chatty.
-pykka = WARNING
-
-# Setting apiclient to INFO will log the URL of every Arvados API request.
-apiclient = WARNING
-
-[Arvados]
-host = {host}
-token = {token}
-timeout = 15
-jobs_queue = no
-
-# Accept an untrusted SSL certificate from the API server?
-insecure = yes
-
-[Cloud]
-provider = gce
-driver_class = {driver_class}
-
-# Shutdown windows define periods of time when a node may and may not be shut
-# down.  These are windows in full minutes, separated by commas.  Counting from
-# the time the node is booted, the node WILL NOT shut down for N1 minutes; then
-# it MAY shut down for N2 minutes; then it WILL NOT shut down for N3 minutes;
-# and so on.  For example, "20, 999999" means the node may shut down between
-# the 20th and 999999th minutes of uptime.
-# Azure bills by the minute, so it makes sense to agressively shut down idle
-# nodes.  Specify at least two windows.  You can add as many as you need beyond
-# that.
-shutdown_windows = 0.05, 999999
-
-[Cloud Credentials]
-key = 00000000-0000-0000-0000-000000000000
-secret = PASSWORD
-timeout = 60
-region = East US
-
-[Cloud List]
-
-[Cloud Create]
-# The image id
-image = fake_image_id
-
-# Path to a local ssh key file that will be used to provision new nodes.
-ssh_key = {ssh_key}
-
-# the API server to ping
-ping_host = {host}
-
-# You can define any number of Size sections to list Azure sizes you're willing
-# to use.  The Node Manager should boot the cheapest size(s) that can run jobs
-# in the queue.  You must also provide price per hour as the Azure driver
-# compute currently does not report prices.
-#
-# See https://azure.microsoft.com/en-us/pricing/details/virtual-machines/
-# for a list of known machine types that may be used as a Size parameter.
-#
-# Each size section MUST define the number of cores are available in this
-# size class (since libcloud does not provide any consistent API for exposing
-# this setting).
-# You may also want to define the amount of scratch space (expressed
-# in GB) for Crunch jobs.  You can also override Microsoft's provided
-# data fields by setting them here.
-
-[Size n1-standard-1]
-cores = 1
-price = 0.56
-
-[Size n1-standard-2]
-cores = 2
-price = 1.12
\ No newline at end of file
diff --git a/services/nodemanager/tests/integration_test.py b/services/nodemanager/tests/integration_test.py
deleted file mode 100755 (executable)
index 1ba2957..0000000
+++ /dev/null
@@ -1,494 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-"""Integration test framework for node manager.
-
-Runs full node manager with an API server (needs ARVADOS_API_HOST and
-ARVADOS_API_TOKEN).  Stubs out the cloud driver and slurm commands to mock
-specific behaviors.  Monitors the log output to verify an expected sequence of
-events or behaviors for each test.
-
-"""
-
-import subprocess32 as subprocess
-import os
-import sys
-import re
-import time
-import logging
-import stat
-import tempfile
-import shutil
-import errno
-from functools import partial
-import arvados
-import StringIO
-
-formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s')
-
-handler = logging.StreamHandler(sys.stderr)
-handler.setFormatter(formatter)
-logger = logging.getLogger("logger")
-logger.setLevel(logging.INFO)
-logger.addHandler(handler)
-
-detail = logging.getLogger("detail")
-detail.setLevel(logging.INFO)
-if os.environ.get("ANMTEST_LOGLEVEL"):
-    detail_content = sys.stderr
-else:
-    detail_content = StringIO.StringIO()
-handler = logging.StreamHandler(detail_content)
-handler.setFormatter(formatter)
-detail.addHandler(handler)
-
-fake_slurm = None
-compute_nodes = None
-all_jobs = None
-unsatisfiable_job_scancelled = None
-
-def update_script(path, val):
-    with open(path+"_", "w") as f:
-        f.write(val)
-    os.chmod(path+"_", stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR)
-    os.rename(path+"_", path)
-    detail.info("Update script %s: %s", path, val)
-
-def set_squeue(g):
-    global all_jobs
-    update_script(os.path.join(fake_slurm, "squeue"), "#!/bin/sh\n" +
-                  "\n".join("echo '1|100|100|%s|%s|(null)|1234567890'" % (v, k) for k,v in all_jobs.items()))
-    return 0
-
-def set_queue_unsatisfiable(g):
-    global all_jobs, unsatisfiable_job_scancelled
-    # Simulate a job requesting a 99 core node.
-    update_script(os.path.join(fake_slurm, "squeue"), "#!/bin/sh\n" +
-                  "\n".join("echo '99|100|100|%s|%s|(null)|1234567890'" % (v, k) for k,v in all_jobs.items()))
-    update_script(os.path.join(fake_slurm, "scancel"), "#!/bin/sh\n" +
-                  "\ntouch %s" % unsatisfiable_job_scancelled)
-    return 0
-
-def job_cancelled(g):
-    global unsatisfiable_job_scancelled
-    cancelled_job = g.group(1)
-    api = arvados.api('v1')
-    # Check that 'scancel' was called
-    if not os.path.isfile(unsatisfiable_job_scancelled):
-        return 1
-    # Check for the log entry
-    log_entry = api.logs().list(
-        filters=[
-            ['object_uuid', '=', cancelled_job],
-            ['event_type', '=', 'stderr'],
-        ]).execute()['items'][0]
-    if not re.match(
-            r"Constraints cannot be satisfied",
-            log_entry['properties']['text']):
-        return 1
-    return 0
-
-def node_paired(g):
-    global compute_nodes
-    compute_nodes[g.group(1)] = g.group(3)
-
-    update_script(os.path.join(fake_slurm, "sinfo"), "#!/bin/sh\n" +
-                  "\n".join("echo '%s|alloc|(null)'" % (v) for k,v in compute_nodes.items()))
-
-    for k,v in all_jobs.items():
-        if v == "ReqNodeNotAvail":
-            all_jobs[k] = "Running"
-            break
-
-    set_squeue(g)
-
-    return 0
-
-def node_busy(g):
-    update_script(os.path.join(fake_slurm, "sinfo"), "#!/bin/sh\n" +
-                  "\n".join("echo '%s|idle|(null)'" % (v) for k,v in compute_nodes.items()))
-    return 0
-
-def node_shutdown(g):
-    global compute_nodes
-    if g.group(1) in compute_nodes:
-        del compute_nodes[g.group(1)]
-        return 0
-    else:
-        return 1
-
-
-def jobs_req(g):
-    global all_jobs
-    for k,v in all_jobs.items():
-        all_jobs[k] = "ReqNodeNotAvail"
-    set_squeue(g)
-    return 0
-
-def noop(g):
-    return 0
-
-def fail(checks, pattern, g):
-    return 1
-
-def expect_count(count, checks, pattern, g):
-    if count == 0:
-        return 1
-    else:
-        checks[pattern] = partial(expect_count, count-1)
-        return 0
-
-def run_test(name, actions, checks, driver_class, jobs, provider):
-    code = 0
-    global unsatisfiable_job_scancelled
-    unsatisfiable_job_scancelled = os.path.join(tempfile.mkdtemp(),
-                                                "scancel_called")
-
-    # Delete any stale node records
-    api = arvados.api('v1')
-    for n in api.nodes().list().execute()['items']:
-        api.nodes().delete(uuid=n["uuid"]).execute()
-
-    logger.info("Start %s", name)
-
-    global fake_slurm
-    fake_slurm = tempfile.mkdtemp()
-    detail.info("fake_slurm is %s", fake_slurm)
-
-    global compute_nodes
-    compute_nodes = {}
-
-    global all_jobs
-    all_jobs = jobs
-
-    env = os.environ.copy()
-    env["PATH"] = fake_slurm + ":" + env["PATH"]
-
-    # Reset fake squeue/sinfo to empty
-    update_script(os.path.join(fake_slurm, "squeue"), "#!/bin/sh\n")
-    update_script(os.path.join(fake_slurm, "sinfo"), "#!/bin/sh\n")
-
-    # Write configuration file for test
-    with open("tests/fake_%s.cfg.template" % provider) as f:
-        open(os.path.join(fake_slurm, "id_rsa.pub"), "w").close()
-        with open(os.path.join(fake_slurm, "fake.cfg"), "w") as cfg:
-            cfg.write(f.read().format(host=os.environ["ARVADOS_API_HOST"],
-                                      token=os.environ["ARVADOS_API_TOKEN"],
-                                      driver_class=driver_class,
-                                      ssh_key=os.path.join(fake_slurm, "id_rsa.pub")))
-
-    # Tests must complete in less than 30 seconds.
-    timeout = time.time() + 30
-    terminated = False
-
-    # Now start node manager
-    p = subprocess.Popen(["bin/arvados-node-manager", "--foreground", "--config", os.path.join(fake_slurm, "fake.cfg")],
-                         bufsize=0, stderr=subprocess.PIPE, env=env)
-
-    # Test main loop:
-    # - Read line
-    # - Apply negative checks (things that are not supposed to happen)
-    # - Check timeout
-    # - Check if the next action should trigger
-    # - If all actions are exhausted, terminate with test success
-    # - If it hits timeout with actions remaining, terminate with test failed
-    try:
-        # naive line iteration over pipes gets buffered, which isn't what we want,
-        # see https://bugs.python.org/issue3907
-        for line in iter(p.stderr.readline, ""):
-            detail_content.write(line)
-
-            for k,v in checks.items():
-                g = re.match(k, line)
-                if g:
-                    detail.info("Matched check %s", k)
-                    code += v(checks, k, g)
-                    if code != 0:
-                        detail.error("Check failed")
-                        if not terminated:
-                            p.kill()
-                            terminated = True
-
-            if terminated:
-                continue
-
-            if time.time() > timeout:
-                detail.error("Exceeded timeout with actions remaining: %s", actions)
-                code += 1
-                if not terminated:
-                    p.kill()
-                    terminated = True
-
-            k, v = actions[0]
-            g = re.match(k, line)
-            if g:
-                detail.info("Matched action %s", k)
-                actions.pop(0)
-                code += v(g)
-                if code != 0:
-                    detail.error("Action failed")
-                    p.kill()
-                    terminated = True
-
-            if not actions:
-                p.kill()
-                terminated = True
-    except KeyboardInterrupt:
-        p.kill()
-
-    if actions:
-        detail.error("Ended with remaining actions: %s", actions)
-        code = 1
-
-    shutil.rmtree(fake_slurm)
-    shutil.rmtree(os.path.dirname(unsatisfiable_job_scancelled))
-
-    if code == 0:
-        logger.info("%s passed", name)
-    else:
-        if isinstance(detail_content, StringIO.StringIO):
-            detail_content.seek(0)
-            chunk = detail_content.read(4096)
-            while chunk:
-                try:
-                    sys.stderr.write(chunk)
-                    chunk = detail_content.read(4096)
-                except IOError as e:
-                    if e.errno == errno.EAGAIN:
-                        # try again (probably pipe buffer full)
-                        pass
-                    else:
-                        raise
-        logger.info("%s failed", name)
-
-    return code
-
-
-def main():
-    # Test lifecycle.
-
-    tests = {
-        "test_unsatisfiable_jobs" : (
-            # Actions (pattern -> action)
-            [
-                (r".*Daemon started", set_queue_unsatisfiable),
-                (r".*Cancelled unsatisfiable job '(\S+)'", job_cancelled),
-            ],
-            # Checks (things that shouldn't happen)
-            {
-                r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)": fail,
-                r".*Trying to cancel job '(\S+)'": fail,
-            },
-            # Driver class
-            "arvnodeman.test.fake_driver.FakeDriver",
-            # Jobs
-            {"34t0i-dz642-h42bg3hq4bdfpf9": "ReqNodeNotAvail"},
-            # Provider
-            "azure"),
-        "test_single_node_azure": (
-            # Actions (pattern -> action)
-            [
-                (r".*Daemon started", set_squeue),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Not eligible for shut down because node state is \('busy', 'open', .*\)", node_busy),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Suggesting shutdown because node state is \('idle', 'open', .*\)", noop),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-            ],
-            # Checks (things that shouldn't happen)
-            {
-                r".*Suggesting shutdown because node state is \('down', .*\)": fail,
-                r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)": partial(expect_count, 1),
-                r".*Setting node quota.*": fail,
-            },
-            # Driver class
-            "arvnodeman.test.fake_driver.FakeDriver",
-            # Jobs
-            {"34t0i-dz642-h42bg3hq4bdfpf9": "ReqNodeNotAvail"},
-            # Provider
-            "azure"),
-        "test_multiple_nodes": (
-            # Actions (pattern -> action)
-            [
-                (r".*Daemon started", set_squeue),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Not eligible for shut down because node state is \('busy', 'open', .*\)", node_busy),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Suggesting shutdown because node state is \('idle', 'open', .*\)", noop),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-            ],
-            # Checks (things that shouldn't happen)
-            {
-                r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)": partial(expect_count, 4),
-                r".*Setting node quota.*": fail,
-            },
-            # Driver class
-            "arvnodeman.test.fake_driver.FakeDriver",
-            # Jobs
-            {"34t0i-dz642-h42bg3hq4bdfpf1": "ReqNodeNotAvail",
-             "34t0i-dz642-h42bg3hq4bdfpf2": "ReqNodeNotAvail",
-             "34t0i-dz642-h42bg3hq4bdfpf3": "ReqNodeNotAvail",
-             "34t0i-dz642-h42bg3hq4bdfpf4": "ReqNodeNotAvail"},
-            # Provider
-            "azure"),
-        "test_hit_quota": (
-            # Actions (pattern -> action)
-            [
-                (r".*Daemon started", set_squeue),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Not eligible for shut down because node state is \('busy', 'open', .*\)", node_busy),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Suggesting shutdown because node state is \('idle', 'open', .*\)", noop),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown)
-            ],
-            # Checks (things that shouldn't happen)
-            {
-                r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)": partial(expect_count, 2),
-                r".*Sending create_node request.*": partial(expect_count, 5)
-            },
-            # Driver class
-            "arvnodeman.test.fake_driver.QuotaDriver",
-            # Jobs
-            {"34t0i-dz642-h42bg3hq4bdfpf1": "ReqNodeNotAvail",
-             "34t0i-dz642-h42bg3hq4bdfpf2": "ReqNodeNotAvail",
-             "34t0i-dz642-h42bg3hq4bdfpf3": "ReqNodeNotAvail",
-             "34t0i-dz642-h42bg3hq4bdfpf4": "ReqNodeNotAvail"},
-            # Provider
-            "azure"),
-        "test_probe_quota": (
-            # Actions (pattern -> action)
-            [
-                (r".*Daemon started", set_squeue),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Not eligible for shut down because node state is \('busy', 'open', .*\)", node_busy),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Suggesting shutdown because node state is \('idle', 'open', .*\)", noop),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-                (r".*sending request", jobs_req),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Not eligible for shut down because node state is \('busy', 'open', .*\)", node_busy),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Suggesting shutdown because node state is \('idle', 'open', .*\)", noop),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-            ],
-            # Checks (things that shouldn't happen)
-            {
-                r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)": partial(expect_count, 6),
-                r".*Sending create_node request.*": partial(expect_count, 9)
-            },
-            # Driver class
-            "arvnodeman.test.fake_driver.QuotaDriver",
-            # Jobs
-            {"34t0i-dz642-h42bg3hq4bdfpf1": "ReqNodeNotAvail",
-             "34t0i-dz642-h42bg3hq4bdfpf2": "ReqNodeNotAvail",
-             "34t0i-dz642-h42bg3hq4bdfpf3": "ReqNodeNotAvail",
-             "34t0i-dz642-h42bg3hq4bdfpf4": "ReqNodeNotAvail"},
-            # Provider
-            "azure"),
-        "test_no_hang_failing_node_create": (
-            # Actions (pattern -> action)
-            [
-                (r".*Daemon started", set_squeue),
-                (r".*Client error: nope", noop),
-                (r".*Client error: nope", noop),
-                (r".*Client error: nope", noop),
-                (r".*Client error: nope", noop),
-            ],
-            # Checks (things that shouldn't happen)
-            {},
-            # Driver class
-            "arvnodeman.test.fake_driver.FailingDriver",
-            # Jobs
-            {"34t0i-dz642-h42bg3hq4bdfpf1": "ReqNodeNotAvail",
-             "34t0i-dz642-h42bg3hq4bdfpf2": "ReqNodeNotAvail",
-             "34t0i-dz642-h42bg3hq4bdfpf3": "ReqNodeNotAvail",
-             "34t0i-dz642-h42bg3hq4bdfpf4": "ReqNodeNotAvail"},
-            # Provider
-            "azure"),
-        "test_retry_create": (
-            # Actions (pattern -> action)
-            [
-                (r".*Daemon started", set_squeue),
-                (r".*Rate limit exceeded - scheduling retry in 2 seconds", noop),
-                (r".*Rate limit exceeded - scheduling retry in 1 seconds", noop),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", noop),
-            ],
-            # Checks (things that shouldn't happen)
-            {},
-            # Driver class
-            "arvnodeman.test.fake_driver.RetryDriver",
-            # Jobs
-            {"34t0i-dz642-h42bg3hq4bdfpf1": "ReqNodeNotAvail"},
-            # Provider
-            "azure"),
-        "test_single_node_aws": (
-            # Actions (pattern -> action)
-            [
-                (r".*Daemon started", set_squeue),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Not eligible for shut down because node state is \('busy', 'open', .*\)", node_busy),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Suggesting shutdown because node state is \('idle', 'open', .*\)", noop),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-            ],
-            # Checks (things that shouldn't happen)
-            {
-                r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)": partial(expect_count, 1),
-                r".*Setting node quota.*": fail,
-            },
-            # Driver class
-            "arvnodeman.test.fake_driver.FakeAwsDriver",
-            # Jobs
-            {"34t0i-dz642-h42bg3hq4bdfpf9": "ReqNodeNotAvail"},
-            # Provider
-            "ec2"),
-        "test_single_node_gce": (
-            # Actions (pattern -> action)
-            [
-                (r".*Daemon started", set_squeue),
-                (r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)", node_paired),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Not eligible for shut down because node state is \('busy', 'open', .*\)", node_busy),
-                (r".*ComputeNodeMonitorActor\..*\.([^[]*).*Suggesting shutdown because node state is \('idle', 'open', .*\)", noop),
-                (r".*ComputeNodeShutdownActor\..*\.([^[]*).*Shutdown success", node_shutdown),
-            ],
-            # Checks (things that shouldn't happen)
-            {
-                r".*Cloud node (\S+) is now paired with Arvados node (\S+) with hostname (\S+)": partial(expect_count, 1),
-                r".*Setting node quota.*": fail,
-            },
-            # Driver class
-            "arvnodeman.test.fake_driver.FakeGceDriver",
-            # Jobs
-            {"34t0i-dz642-h42bg3hq4bdfpf9": "ReqNodeNotAvail"},
-            # Provider
-            "gce")
-    }
-
-    code = 0
-    if len(sys.argv) > 1:
-        code = run_test(sys.argv[1], *tests[sys.argv[1]])
-    else:
-        for t in sorted(tests.keys()):
-            code += run_test(t, *tests[t])
-
-    if code == 0:
-        logger.info("Tests passed")
-    else:
-        logger.info("Tests failed")
-
-    exit(code)
-
-if __name__ == '__main__':
-    main()
diff --git a/services/nodemanager/tests/stress_test.cwl b/services/nodemanager/tests/stress_test.cwl
deleted file mode 100644 (file)
index 082df64..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-#
-#
-# Usage: arvados-cwl-runner stress_test.cwl
-#
-# Submits 100 jobs or containers, creating load on node manager and
-# scheduler.
-
-class: Workflow
-cwlVersion: v1.0
-requirements:
-  ScatterFeatureRequirement: {}
-  InlineJavascriptRequirement: {}
-inputs: []
-outputs: []
-steps:
-  step1:
-    in: []
-    out: [out]
-    run:
-      class: ExpressionTool
-      inputs: []
-      outputs:
-        out: int[]
-      expression: |
-        ${
-          var r = [];
-          for (var i = 1; i <= 100; i++) {
-            r.push(i);
-          }
-          return {out: r};
-        }
-  step2:
-    in:
-      num: step1/out
-    out: []
-    scatter: num
-    run:
-      class: CommandLineTool
-      requirements:
-        ShellCommandRequirement: {}
-      inputs:
-        num: int
-      outputs: []
-      arguments: [echo, "starting",
-        {shellQuote: false, valueFrom: "&&"},
-        sleep, $((101-inputs.num)*2),
-        {shellQuote: false, valueFrom: "&&"},
-        echo, "the number of the day is", $(inputs.num)]
diff --git a/services/nodemanager/tests/test_arguments.py b/services/nodemanager/tests/test_arguments.py
deleted file mode 100644 (file)
index e325e52..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-import io
-import os
-import sys
-import tempfile
-import unittest
-
-import arvnodeman.launcher as nodeman
-from . import testutil
-
-class ArvNodemArgumentsTestCase(unittest.TestCase):
-    def run_nodeman(self, args):
-        return nodeman.main(args)
-
-    def test_unsupported_arg(self):
-        with self.assertRaises(SystemExit):
-            self.run_nodeman(['-x=unknown'])
-
-    def test_version_argument(self):
-        err = io.BytesIO()
-        out = io.BytesIO()
-        with testutil.redirected_streams(stdout=out, stderr=err):
-            with self.assertRaises(SystemExit):
-                self.run_nodeman(['--version'])
-        self.assertEqual(out.getvalue(), '')
-        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
diff --git a/services/nodemanager/tests/test_clientactor.py b/services/nodemanager/tests/test_clientactor.py
deleted file mode 100644 (file)
index 19e094d..0000000
+++ /dev/null
@@ -1,152 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import unittest
-
-import mock
-import pykka
-
-import arvnodeman.clientactor as clientactor
-from . import testutil
-
-class RemotePollLoopActorTestCase(testutil.RemotePollLoopActorTestMixin,
-                                  unittest.TestCase):
-    class MockClientError(Exception):
-        pass
-
-    class TestActor(clientactor.RemotePollLoopActor):
-        LOGGER_NAME = 'arvnodeman.testpoll'
-
-        def _send_request(self):
-            return self._client()
-    TestActor.CLIENT_ERRORS = (MockClientError,)
-    TEST_CLASS = TestActor
-
-
-    def build_monitor(self, side_effect, *args, **kwargs):
-        super(RemotePollLoopActorTestCase, self).build_monitor(*args, **kwargs)
-        self.client.side_effect = side_effect
-
-    def test_poll_loop_starts_after_subscription(self):
-        self.build_monitor(['test1'])
-        self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
-        self.stop_proxy(self.monitor)
-        self.subscriber.assert_called_with('test1')
-        self.assertTrue(self.timer.schedule.called)
-
-    def test_poll_loop_continues_after_failure(self):
-        self.build_monitor(self.MockClientError)
-        self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
-        self.assertTrue(self.stop_proxy(self.monitor),
-                        "poll loop died after error")
-        self.assertTrue(self.timer.schedule.called,
-                        "poll loop did not reschedule after error")
-        self.assertFalse(self.subscriber.called,
-                         "poll loop notified subscribers after error")
-
-    def test_late_subscribers_get_responses(self):
-        self.build_monitor(['pre_late_test', 'late_test'])
-        mock_subscriber = mock.Mock(name='mock_subscriber')
-        self.monitor.subscribe(mock_subscriber).get(self.TIMEOUT)
-        self.monitor.subscribe(self.subscriber)
-        self.monitor.poll().get(self.TIMEOUT)
-        self.stop_proxy(self.monitor)
-        self.subscriber.assert_called_with('late_test')
-
-    def test_survive_dead_subscriptions(self):
-        self.build_monitor(['survive1', 'survive2'])
-        dead_subscriber = mock.Mock(name='dead_subscriber')
-        dead_subscriber.side_effect = pykka.ActorDeadError
-        self.monitor.subscribe(dead_subscriber)
-        self.monitor.subscribe(self.subscriber)
-        self.monitor.poll().get(self.TIMEOUT)
-        self.assertTrue(self.stop_proxy(self.monitor),
-                        "poll loop died from dead subscriber")
-        self.subscriber.assert_called_with('survive2')
-
-    def check_poll_timers(self, *test_times):
-        schedule_mock = self.timer.schedule
-        last_expect = None
-        with mock.patch('time.time') as time_mock:
-            for fake_time, expect_next in test_times:
-                time_mock.return_value = fake_time
-                self.monitor.poll(last_expect).get(self.TIMEOUT)
-                self.assertTrue(schedule_mock.called)
-                self.assertEqual(expect_next, schedule_mock.call_args[0][0])
-                schedule_mock.reset_mock()
-                last_expect = expect_next
-
-    def test_poll_timing_on_consecutive_successes_with_drift(self):
-        self.build_monitor(['1', '2'], poll_wait=3, max_poll_wait=14)
-        self.check_poll_timers((0, 3), (4, 6))
-
-    def test_poll_backoff_on_failures(self):
-        self.build_monitor(self.MockClientError, poll_wait=3, max_poll_wait=14)
-        self.check_poll_timers((0, 6), (6, 18), (18, 32))
-
-    def test_poll_timing_after_error_recovery(self):
-        self.build_monitor(['a', self.MockClientError(), 'b'],
-                           poll_wait=3, max_poll_wait=14)
-        self.check_poll_timers((0, 3), (4, 10), (10, 13))
-
-    def test_no_subscriptions_by_key_without_support(self):
-        self.build_monitor([])
-        with self.assertRaises(AttributeError):
-            self.monitor.subscribe_to('key')
-
-
-class RemotePollLoopActorWithKeysTestCase(testutil.RemotePollLoopActorTestMixin,
-                                          unittest.TestCase):
-    class TestActor(RemotePollLoopActorTestCase.TestActor):
-        def _item_key(self, item):
-            return item['key']
-    TEST_CLASS = TestActor
-
-
-    def build_monitor(self, side_effect, *args, **kwargs):
-        super(RemotePollLoopActorWithKeysTestCase, self).build_monitor(
-            *args, **kwargs)
-        self.client.side_effect = side_effect
-
-    def test_key_subscription(self):
-        self.build_monitor([[{'key': 1}, {'key': 2}]])
-        self.monitor.subscribe_to(2, self.subscriber).get(self.TIMEOUT)
-        self.stop_proxy(self.monitor)
-        self.subscriber.assert_called_with({'key': 2})
-
-    def test_survive_dead_key_subscriptions(self):
-        item = {'key': 3}
-        self.build_monitor([[item], [item]])
-        dead_subscriber = mock.Mock(name='dead_subscriber')
-        dead_subscriber.side_effect = pykka.ActorDeadError
-        self.monitor.subscribe_to(3, dead_subscriber)
-        self.monitor.subscribe_to(3, self.subscriber)
-        self.monitor.poll().get(self.TIMEOUT)
-        self.assertTrue(self.stop_proxy(self.monitor),
-                        "poll loop died from dead key subscriber")
-        self.subscriber.assert_called_with(item)
-
-    def test_mixed_subscriptions(self):
-        item = {'key': 4}
-        self.build_monitor([[item], [item]])
-        key_subscriber = mock.Mock(name='key_subscriber')
-        self.monitor.subscribe(self.subscriber)
-        self.monitor.subscribe_to(4, key_subscriber)
-        self.monitor.poll().get(self.TIMEOUT)
-        self.stop_proxy(self.monitor)
-        self.subscriber.assert_called_with([item])
-        key_subscriber.assert_called_with(item)
-
-    def test_subscription_to_missing_key(self):
-        self.build_monitor([[]])
-        self.monitor.subscribe_to('nonesuch', self.subscriber).get(self.TIMEOUT)
-        self.stop_proxy(self.monitor)
-        self.subscriber.assert_called_with(None)
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/services/nodemanager/tests/test_computenode.py b/services/nodemanager/tests/test_computenode.py
deleted file mode 100644 (file)
index 898112b..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import time
-import unittest
-
-import arvados.errors as arverror
-import mock
-
-import arvnodeman.computenode as cnode
-from . import testutil
-
-@mock.patch('time.time', return_value=1)
-class ShutdownTimerTestCase(unittest.TestCase):
-    def test_two_length_window(self, time_mock):
-        timer = cnode.ShutdownTimer(time_mock.return_value, [8, 2])
-        self.assertEqual(481, timer.next_opening())
-        self.assertFalse(timer.window_open())
-        time_mock.return_value += 500
-        self.assertEqual(1081, timer.next_opening())
-        self.assertTrue(timer.window_open())
-        time_mock.return_value += 200
-        self.assertEqual(1081, timer.next_opening())
-        self.assertFalse(timer.window_open())
-
-    def test_three_length_window(self, time_mock):
-        timer = cnode.ShutdownTimer(time_mock.return_value, [6, 3, 1])
-        self.assertEqual(361, timer.next_opening())
-        self.assertFalse(timer.window_open())
-        time_mock.return_value += 400
-        self.assertEqual(961, timer.next_opening())
-        self.assertTrue(timer.window_open())
-        time_mock.return_value += 200
-        self.assertEqual(961, timer.next_opening())
-        self.assertFalse(timer.window_open())
-
-
-class ArvadosTimestamp(unittest.TestCase):
-    def test_arvados_timestamp(self):
-        self.assertEqual(1527710178, cnode.arvados_timestamp('2018-05-30T19:56:18Z'))
-        self.assertEqual(1527710178.999371, cnode.arvados_timestamp('2018-05-30T19:56:18.999371Z'))
diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py
deleted file mode 100644 (file)
index aee3cbd..0000000
+++ /dev/null
@@ -1,562 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import time
-import unittest
-
-import arvados.errors as arverror
-import httplib2
-import mock
-import pykka
-import threading
-
-from libcloud.common.exceptions import BaseHTTPError
-
-import arvnodeman.computenode.dispatch as dispatch
-import arvnodeman.status as status
-from arvnodeman.computenode.driver import BaseComputeNodeDriver
-from . import testutil
-
-class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
-    ACTOR_CLASS = dispatch.ComputeNodeSetupActor
-
-    def make_mocks(self, arvados_effect=None):
-        if arvados_effect is None:
-            arvados_effect = [testutil.arvados_node_mock(
-                slot_number=None,
-                hostname=None,
-                first_ping_at=None,
-                last_ping_at=None,
-            )]
-        self.arvados_effect = arvados_effect
-        self.timer = testutil.MockTimer()
-        self.api_client = mock.MagicMock(name='api_client')
-        self.api_client.nodes().create().execute.side_effect = arvados_effect
-        self.api_client.nodes().update().execute.side_effect = arvados_effect
-        self.cloud_client = mock.MagicMock(name='cloud_client')
-        self.cloud_client.create_node.return_value = testutil.cloud_node_mock(1)
-
-    def make_actor(self, arv_node=None):
-        if not hasattr(self, 'timer'):
-            self.make_mocks(arvados_effect=[arv_node] if arv_node else None)
-        self.setup_actor = self.ACTOR_CLASS.start(
-            self.timer, self.api_client, self.cloud_client,
-            testutil.MockSize(1), arv_node).proxy()
-
-    def assert_node_properties_updated(self, uuid=None,
-                                       size=testutil.MockSize(1)):
-        self.api_client.nodes().update.assert_any_call(
-            uuid=(uuid or self.arvados_effect[-1]['uuid']),
-            body={
-                'properties': {
-                    'cloud_node': {
-                        'size': size.id,
-                        'price': size.price}}})
-
-    def test_creation_without_arvados_node(self):
-        self.make_actor()
-        finished = threading.Event()
-        self.setup_actor.subscribe(lambda _: finished.set())
-        self.assertEqual(self.arvados_effect[-1],
-                         self.setup_actor.arvados_node.get(self.TIMEOUT))
-        assert(finished.wait(self.TIMEOUT))
-        self.api_client.nodes().create.called_with(body={}, assign_slot=True)
-        self.assertEqual(1, self.api_client.nodes().create().execute.call_count)
-        self.assertEqual(1, self.api_client.nodes().update().execute.call_count)
-        self.assert_node_properties_updated()
-        self.assertEqual(self.cloud_client.create_node(),
-                         self.setup_actor.cloud_node.get(self.TIMEOUT))
-
-    def test_creation_with_arvados_node(self):
-        self.make_mocks(arvados_effect=[testutil.arvados_node_mock()]*2)
-        self.make_actor(testutil.arvados_node_mock())
-        finished = threading.Event()
-        self.setup_actor.subscribe(lambda _: finished.set())
-        self.assertEqual(self.arvados_effect[-1],
-                         self.setup_actor.arvados_node.get(self.TIMEOUT))
-        assert(finished.wait(self.TIMEOUT))
-        self.assert_node_properties_updated()
-        self.api_client.nodes().create.called_with(body={}, assign_slot=True)
-        self.assertEqual(3, self.api_client.nodes().update().execute.call_count)
-        self.assertEqual(self.cloud_client.create_node(),
-                         self.setup_actor.cloud_node.get(self.TIMEOUT))
-
-    def test_failed_arvados_calls_retried(self):
-        self.make_mocks([
-                arverror.ApiError(httplib2.Response({'status': '500'}), ""),
-                testutil.arvados_node_mock(),
-                ])
-        self.make_actor()
-        self.wait_for_assignment(self.setup_actor, 'arvados_node')
-
-    def test_failed_cloud_calls_retried(self):
-        self.make_mocks()
-        self.cloud_client.create_node.side_effect = [
-            Exception("test cloud creation error"),
-            self.cloud_client.create_node.return_value,
-            ]
-        self.make_actor()
-        self.wait_for_assignment(self.setup_actor, 'cloud_node')
-
-    def test_basehttperror_retried(self):
-        self.make_mocks()
-        self.cloud_client.create_node.side_effect = [
-            BaseHTTPError(500, "Try again"),
-            self.cloud_client.create_node.return_value,
-            ]
-        self.make_actor()
-        self.wait_for_assignment(self.setup_actor, 'cloud_node')
-        self.setup_actor.ping().get(self.TIMEOUT)
-        self.assertEqual(1, self.cloud_client.post_create_node.call_count)
-
-    def test_instance_exceeded_not_retried(self):
-        self.make_mocks()
-        self.cloud_client.create_node.side_effect = [
-            BaseHTTPError(400, "InstanceLimitExceeded"),
-            self.cloud_client.create_node.return_value,
-            ]
-        self.make_actor()
-        done = self.FUTURE_CLASS()
-        self.setup_actor.subscribe(done.set)
-        done.get(self.TIMEOUT)
-        self.assertEqual(0, self.cloud_client.post_create_node.call_count)
-
-    def test_failed_post_create_retried(self):
-        self.make_mocks()
-        self.cloud_client.post_create_node.side_effect = [
-            Exception("test cloud post-create error"), None]
-        self.make_actor()
-        done = self.FUTURE_CLASS()
-        self.setup_actor.subscribe(done.set)
-        done.get(self.TIMEOUT)
-        self.assertEqual(2, self.cloud_client.post_create_node.call_count)
-
-    def test_stop_when_no_cloud_node(self):
-        self.make_mocks(
-            arverror.ApiError(httplib2.Response({'status': '500'}), ""))
-        self.make_actor()
-        self.assertTrue(
-            self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT))
-        self.assertTrue(
-            self.setup_actor.actor_ref.actor_stopped.wait(self.TIMEOUT))
-
-    def test_no_stop_when_cloud_node(self):
-        self.make_actor()
-        self.wait_for_assignment(self.setup_actor, 'cloud_node')
-        self.assertFalse(
-            self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT))
-        self.assertTrue(self.stop_proxy(self.setup_actor),
-                        "actor was stopped by stop_if_no_cloud_node")
-
-    def test_subscribe(self):
-        self.make_mocks(
-            arverror.ApiError(httplib2.Response({'status': '500'}), ""))
-        self.make_actor()
-        subscriber = mock.Mock(name='subscriber_mock')
-        self.setup_actor.subscribe(subscriber)
-        retry_resp = [testutil.arvados_node_mock()]
-        self.api_client.nodes().create().execute.side_effect = retry_resp
-        self.api_client.nodes().update().execute.side_effect = retry_resp
-        self.wait_for_assignment(self.setup_actor, 'cloud_node')
-        self.setup_actor.ping().get(self.TIMEOUT)
-        self.assertEqual(self.setup_actor.actor_ref.actor_urn,
-                         subscriber.call_args[0][0].actor_ref.actor_urn)
-
-    def test_late_subscribe(self):
-        self.make_actor()
-        subscriber = mock.Mock(name='subscriber_mock')
-        self.wait_for_assignment(self.setup_actor, 'cloud_node')
-        self.setup_actor.subscribe(subscriber).get(self.TIMEOUT)
-        self.stop_proxy(self.setup_actor)
-        self.assertEqual(self.setup_actor.actor_ref.actor_urn,
-                         subscriber.call_args[0][0].actor_ref.actor_urn)
-
-
-class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
-    def make_mocks(self, cloud_node=None, arvados_node=None,
-                   shutdown_open=True, node_broken=False):
-        self.timer = testutil.MockTimer()
-        self.shutdowns = testutil.MockShutdownTimer()
-        self.shutdowns._set_state(shutdown_open, 300)
-        self.cloud_client = mock.MagicMock(name='cloud_client')
-        self.cloud_client.broken.return_value = node_broken
-        self.arvados_client = mock.MagicMock(name='arvados_client')
-        self.updates = mock.MagicMock(name='update_mock')
-        if cloud_node is None:
-            cloud_node = testutil.cloud_node_mock()
-        self.cloud_node = cloud_node
-        self.arvados_node = arvados_node
-
-    def make_actor(self, cancellable=True, start_time=None):
-        if not hasattr(self, 'timer'):
-            self.make_mocks()
-        if start_time is None:
-            start_time = time.time()
-        monitor_actor = dispatch.ComputeNodeMonitorActor.start(
-            self.cloud_node, start_time, self.shutdowns,
-            self.timer, self.updates, self.cloud_client,
-            self.arvados_node)
-        self.shutdown_actor = self.ACTOR_CLASS.start(
-            self.timer, self.cloud_client, self.arvados_client, monitor_actor,
-            cancellable).proxy()
-        self.monitor_actor = monitor_actor.proxy()
-
-    def check_success_flag(self, expected, allow_msg_count=1):
-        # allow_msg_count is the number of internal messages that may
-        # need to be handled for shutdown to finish.
-        for _ in range(1 + allow_msg_count):
-            last_flag = self.shutdown_actor.success.get(self.TIMEOUT)
-            if last_flag is expected:
-                break
-        else:
-            self.fail("success flag {} is not {}".format(last_flag, expected))
-
-    def test_boot_failure_counting(self, *mocks):
-        # A boot failure happens when a node transitions from unpaired to shutdown
-        status.tracker.update({'boot_failures': 0})
-        self.make_mocks(shutdown_open=True, arvados_node=testutil.arvados_node_mock(crunch_worker_state="unpaired"))
-        self.cloud_client.destroy_node.return_value = True
-        self.make_actor(cancellable=False)
-        self.check_success_flag(True, 2)
-        self.assertTrue(self.cloud_client.destroy_node.called)
-        self.assertEqual(1, status.tracker.get('boot_failures'))
-
-    def test_cancellable_shutdown(self, *mocks):
-        self.make_mocks(shutdown_open=True, arvados_node=testutil.arvados_node_mock(crunch_worker_state="busy"))
-        self.cloud_client.destroy_node.return_value = True
-        self.make_actor(cancellable=True)
-        self.check_success_flag(False, 2)
-        self.assertFalse(self.cloud_client.destroy_node.called)
-
-    def test_uncancellable_shutdown(self, *mocks):
-        status.tracker.update({'boot_failures': 0})
-        self.make_mocks(shutdown_open=True, arvados_node=testutil.arvados_node_mock(crunch_worker_state="busy"))
-        self.cloud_client.destroy_node.return_value = True
-        self.make_actor(cancellable=False)
-        self.check_success_flag(True, 4)
-        self.assertTrue(self.cloud_client.destroy_node.called)
-        # A normal shutdown shouldn't be counted as boot failure
-        self.assertEqual(0, status.tracker.get('boot_failures'))
-
-    def test_arvados_node_cleaned_after_shutdown(self, *mocks):
-        if len(mocks) == 1:
-            mocks[0].return_value = "drain\n"
-        cloud_node = testutil.cloud_node_mock(62)
-        arv_node = testutil.arvados_node_mock(62)
-        self.make_mocks(cloud_node, arv_node)
-        self.make_actor()
-        self.check_success_flag(True, 3)
-        update_mock = self.arvados_client.nodes().update
-        self.assertTrue(update_mock.called)
-        update_kwargs = update_mock.call_args_list[0][1]
-        self.assertEqual(arv_node['uuid'], update_kwargs.get('uuid'))
-        self.assertIn('body', update_kwargs)
-        for clear_key in ['slot_number', 'hostname', 'ip_address',
-                          'first_ping_at', 'last_ping_at']:
-            self.assertIn(clear_key, update_kwargs['body'])
-            self.assertIsNone(update_kwargs['body'][clear_key])
-        self.assertTrue(update_mock().execute.called)
-
-    def test_arvados_node_not_cleaned_after_shutdown_cancelled(self, *mocks):
-        if len(mocks) == 1:
-            mocks[0].return_value = "idle\n"
-        cloud_node = testutil.cloud_node_mock(61)
-        arv_node = testutil.arvados_node_mock(61)
-        self.make_mocks(cloud_node, arv_node, shutdown_open=False)
-        self.cloud_client.destroy_node.return_value = False
-        self.make_actor(cancellable=True)
-        self.shutdown_actor.cancel_shutdown("test")
-        self.shutdown_actor.ping().get(self.TIMEOUT)
-        self.check_success_flag(False, 2)
-        self.assertFalse(self.arvados_client.nodes().update.called)
-
-
-class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
-                                       unittest.TestCase):
-    ACTOR_CLASS = dispatch.ComputeNodeShutdownActor
-
-    def test_easy_shutdown(self):
-        self.make_actor(start_time=0)
-        self.check_success_flag(True)
-        self.assertTrue(self.cloud_client.destroy_node.called)
-
-    def test_shutdown_cancelled_when_destroy_node_fails(self):
-        self.make_mocks(node_broken=True)
-        self.cloud_client.destroy_node.return_value = False
-        self.make_actor(start_time=0)
-        self.check_success_flag(False, 2)
-        self.assertEqual(1, self.cloud_client.destroy_node.call_count)
-        self.assertEqual(self.ACTOR_CLASS.DESTROY_FAILED,
-                         self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
-
-    def test_late_subscribe(self):
-        self.make_actor()
-        subscriber = mock.Mock(name='subscriber_mock')
-        self.shutdown_actor.subscribe(subscriber).get(self.TIMEOUT)
-        self.stop_proxy(self.shutdown_actor)
-        self.assertTrue(subscriber.called)
-        self.assertEqual(self.shutdown_actor.actor_ref.actor_urn,
-                         subscriber.call_args[0][0].actor_ref.actor_urn)
-
-
-class ComputeNodeUpdateActorTestCase(testutil.ActorTestMixin,
-                                     unittest.TestCase):
-    ACTOR_CLASS = dispatch.ComputeNodeUpdateActor
-
-    def make_actor(self):
-        self.driver = mock.MagicMock(name='driver_mock')
-        self.timer = mock.MagicMock(name='timer_mock')
-        self.updater = self.ACTOR_CLASS.start(self.driver, self.timer).proxy()
-
-    def test_node_sync(self, *args):
-        self.make_actor()
-        cloud_node = testutil.cloud_node_mock()
-        arv_node = testutil.arvados_node_mock()
-        self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
-        self.driver().sync_node.assert_called_with(cloud_node, arv_node)
-
-    @testutil.no_sleep
-    def test_node_sync_error(self, *args):
-        self.make_actor()
-        cloud_node = testutil.cloud_node_mock()
-        arv_node = testutil.arvados_node_mock()
-        self.driver().sync_node.side_effect = (IOError, Exception, True)
-        self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
-        self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
-        self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
-        self.driver().sync_node.assert_called_with(cloud_node, arv_node)
-
-class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
-                                      unittest.TestCase):
-    def make_mocks(self, node_num):
-        self.shutdowns = testutil.MockShutdownTimer()
-        self.shutdowns._set_state(False, 300)
-        self.timer = mock.MagicMock(name='timer_mock')
-        self.updates = mock.MagicMock(name='update_mock')
-        self.cloud_mock = testutil.cloud_node_mock(node_num)
-        self.subscriber = mock.Mock(name='subscriber_mock')
-        self.cloud_client = mock.MagicMock(name='cloud_client')
-        self.cloud_client.broken.return_value = False
-
-    def make_actor(self, node_num=1, arv_node=None, start_time=None):
-        if not hasattr(self, 'cloud_mock'):
-            self.make_mocks(node_num)
-        if start_time is None:
-            start_time = time.time()
-        self.node_actor = dispatch.ComputeNodeMonitorActor.start(
-            self.cloud_mock, start_time, self.shutdowns,
-            self.timer, self.updates, self.cloud_client,
-            arv_node, boot_fail_after=300).proxy()
-        self.node_actor.subscribe(self.subscriber).get(self.TIMEOUT)
-
-    def node_state(self, *states):
-        return self.node_actor.in_state(*states).get(self.TIMEOUT)
-
-    def test_in_state_when_unpaired(self):
-        self.make_actor()
-        self.assertTrue(self.node_state('unpaired'))
-
-    def test_in_state_when_pairing_stale(self):
-        self.make_actor(arv_node=testutil.arvados_node_mock(
-                job_uuid=None, age=90000))
-        self.assertTrue(self.node_state('down'))
-
-    def test_in_state_when_no_state_available(self):
-        self.make_actor(arv_node=testutil.arvados_node_mock(
-                crunch_worker_state=None))
-        self.assertTrue(self.node_state('idle'))
-
-    def test_in_state_when_no_state_available_old(self):
-        self.make_actor(arv_node=testutil.arvados_node_mock(
-                crunch_worker_state=None, age=90000))
-        self.assertTrue(self.node_state('down'))
-
-    def test_in_idle_state(self):
-        idle_nodes_before = status.tracker._idle_nodes.keys()
-        self.make_actor(2, arv_node=testutil.arvados_node_mock(job_uuid=None))
-        self.assertTrue(self.node_state('idle'))
-        self.assertFalse(self.node_state('busy'))
-        self.assertTrue(self.node_state('idle', 'busy'))
-        idle_nodes_after = status.tracker._idle_nodes.keys()
-        new_idle_nodes = [n for n in idle_nodes_after if n not in idle_nodes_before]
-        # There should be 1 additional idle node
-        self.assertEqual(1, len(new_idle_nodes))
-
-    def test_in_busy_state(self):
-        idle_nodes_before = status.tracker._idle_nodes.keys()
-        self.make_actor(3, arv_node=testutil.arvados_node_mock(job_uuid=True))
-        self.assertFalse(self.node_state('idle'))
-        self.assertTrue(self.node_state('busy'))
-        self.assertTrue(self.node_state('idle', 'busy'))
-        idle_nodes_after = status.tracker._idle_nodes.keys()
-        new_idle_nodes = [n for n in idle_nodes_after if n not in idle_nodes_before]
-        # There shouldn't be any additional idle node
-        self.assertEqual(0, len(new_idle_nodes))
-
-    def test_init_shutdown_scheduling(self):
-        self.make_actor()
-        self.assertTrue(self.timer.schedule.called)
-        self.assertEqual(300, self.timer.schedule.call_args[0][0])
-
-    def test_shutdown_window_close_scheduling(self):
-        self.make_actor()
-        self.shutdowns._set_state(False, 600)
-        self.timer.schedule.reset_mock()
-        self.node_actor.consider_shutdown().get(self.TIMEOUT)
-        self.stop_proxy(self.node_actor)
-        self.assertTrue(self.timer.schedule.called)
-        self.assertEqual(600, self.timer.schedule.call_args[0][0])
-        self.assertFalse(self.subscriber.called)
-
-    def test_shutdown_subscription(self):
-        self.make_actor(start_time=0)
-        self.shutdowns._set_state(True, 600)
-        self.node_actor.consider_shutdown().get(self.TIMEOUT)
-        self.assertTrue(self.subscriber.called)
-        self.assertEqual(self.node_actor.actor_ref.actor_urn,
-                         self.subscriber.call_args[0][0].actor_ref.actor_urn)
-
-    def test_no_shutdown_booting(self):
-        self.make_actor()
-        self.shutdowns._set_state(True, 600)
-        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT),
-                          (False, "node state is ('unpaired', 'open', 'boot wait', 'not idle')"))
-
-    def test_shutdown_when_invalid_cloud_node_size(self):
-        self.make_mocks(1)
-        self.cloud_mock.size.id = 'invalid'
-        self.cloud_mock.extra['arvados_node_size'] = 'stale.type'
-        self.make_actor()
-        self.shutdowns._set_state(True, 600)
-        self.assertEquals((True, "node's size tag 'stale.type' not recognizable"),
-                          self.node_actor.shutdown_eligible().get(self.TIMEOUT))
-
-    def test_shutdown_without_arvados_node(self):
-        self.make_actor(start_time=0)
-        self.shutdowns._set_state(True, 600)
-        self.assertEquals((True, "node state is ('down', 'open', 'boot exceeded', 'not idle')"),
-                          self.node_actor.shutdown_eligible().get(self.TIMEOUT))
-
-    def test_shutdown_missing(self):
-        arv_node = testutil.arvados_node_mock(10, job_uuid=None,
-                                              crunch_worker_state="down",
-                                              last_ping_at='1970-01-01T01:02:03.04050607Z')
-        self.make_actor(10, arv_node)
-        self.shutdowns._set_state(True, 600)
-        self.assertEquals((True, "node state is ('down', 'open', 'boot wait', 'not idle')"),
-                          self.node_actor.shutdown_eligible().get(self.TIMEOUT))
-
-    def test_shutdown_running_broken(self):
-        arv_node = testutil.arvados_node_mock(12, job_uuid=None,
-                                              crunch_worker_state="down")
-        self.make_actor(12, arv_node)
-        self.shutdowns._set_state(True, 600)
-        self.cloud_client.broken.return_value = True
-        self.assertEquals((True, "node state is ('down', 'open', 'boot wait', 'not idle')"),
-                          self.node_actor.shutdown_eligible().get(self.TIMEOUT))
-
-    def test_shutdown_missing_broken(self):
-        arv_node = testutil.arvados_node_mock(11, job_uuid=None,
-                                              crunch_worker_state="down",
-                                              last_ping_at='1970-01-01T01:02:03.04050607Z')
-        self.make_actor(11, arv_node)
-        self.shutdowns._set_state(True, 600)
-        self.cloud_client.broken.return_value = True
-        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('down', 'open', 'boot wait', 'not idle')"))
-
-    def test_no_shutdown_when_window_closed(self):
-        self.make_actor(3, testutil.arvados_node_mock(3, job_uuid=None))
-        self.assertEquals((False, "node state is ('idle', 'closed', 'boot wait', 'idle exceeded')"),
-                          self.node_actor.shutdown_eligible().get(self.TIMEOUT))
-
-    def test_no_shutdown_when_node_running_job(self):
-        self.make_actor(4, testutil.arvados_node_mock(4, job_uuid=True))
-        self.shutdowns._set_state(True, 600)
-        self.assertEquals((False, "node state is ('busy', 'open', 'boot wait', 'not idle')"),
-                          self.node_actor.shutdown_eligible().get(self.TIMEOUT))
-
-    def test_shutdown_when_node_state_unknown(self):
-        self.make_actor(5, testutil.arvados_node_mock(
-            5, crunch_worker_state=None))
-        self.shutdowns._set_state(True, 600)
-        self.assertEquals((True, "node state is ('idle', 'open', 'boot wait', 'idle exceeded')"),
-                          self.node_actor.shutdown_eligible().get(self.TIMEOUT))
-
-    def test_shutdown_when_node_state_fail(self):
-        self.make_actor(5, testutil.arvados_node_mock(
-            5, crunch_worker_state='fail'))
-        self.shutdowns._set_state(True, 600)
-        self.assertEquals((True, "node state is ('fail', 'open', 'boot wait', 'not idle')"),
-                          self.node_actor.shutdown_eligible().get(self.TIMEOUT))
-
-    def test_no_shutdown_when_node_state_stale(self):
-        self.make_actor(6, testutil.arvados_node_mock(6, age=90000))
-        self.shutdowns._set_state(True, 600)
-        self.assertEquals((False, "node state is stale"),
-                          self.node_actor.shutdown_eligible().get(self.TIMEOUT))
-
-    def test_arvados_node_match(self):
-        self.make_actor(2)
-        arv_node = testutil.arvados_node_mock(
-            2, hostname='compute-two.zzzzz.arvadosapi.com')
-        self.cloud_client.node_id.return_value = '2'
-        pair_id = self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT)
-        self.assertEqual(self.cloud_mock.id, pair_id)
-        self.stop_proxy(self.node_actor)
-        self.updates.sync_node.assert_called_with(self.cloud_mock, arv_node)
-
-    def test_arvados_node_mismatch(self):
-        self.make_actor(3)
-        arv_node = testutil.arvados_node_mock(1)
-        self.assertIsNone(
-            self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT))
-
-    def test_arvados_node_mismatch_first_ping_too_early(self):
-        self.make_actor(4)
-        arv_node = testutil.arvados_node_mock(
-            4, first_ping_at='1971-03-02T14:15:16.1717282Z')
-        self.assertIsNone(
-            self.node_actor.offer_arvados_pair(arv_node).get(self.TIMEOUT))
-
-    def test_update_cloud_node(self):
-        self.make_actor(1)
-        self.make_mocks(2)
-        self.cloud_mock.id = '1'
-        self.node_actor.update_cloud_node(self.cloud_mock)
-        current_cloud = self.node_actor.cloud_node.get(self.TIMEOUT)
-        self.assertEqual([testutil.ip_address_mock(2)],
-                         current_cloud.private_ips)
-
-    def test_missing_cloud_node_update(self):
-        self.make_actor(1)
-        self.node_actor.update_cloud_node(None)
-        current_cloud = self.node_actor.cloud_node.get(self.TIMEOUT)
-        self.assertEqual([testutil.ip_address_mock(1)],
-                         current_cloud.private_ips)
-
-    def test_update_arvados_node(self):
-        self.make_actor(3)
-        job_uuid = 'zzzzz-jjjjj-updatejobnode00'
-        new_arvados = testutil.arvados_node_mock(3, job_uuid)
-        self.node_actor.update_arvados_node(new_arvados)
-        current_arvados = self.node_actor.arvados_node.get(self.TIMEOUT)
-        self.assertEqual(job_uuid, current_arvados['job_uuid'])
-
-    def test_missing_arvados_node_update(self):
-        self.make_actor(4, testutil.arvados_node_mock(4))
-        self.node_actor.update_arvados_node(None)
-        current_arvados = self.node_actor.arvados_node.get(self.TIMEOUT)
-        self.assertEqual(testutil.ip_address_mock(4),
-                         current_arvados['ip_address'])
-
-    def test_update_arvados_node_calls_sync_node(self):
-        self.make_mocks(5)
-        self.cloud_mock.extra['testname'] = 'cloudfqdn.zzzzz.arvadosapi.com'
-        self.make_actor()
-        arv_node = testutil.arvados_node_mock(5)
-        self.node_actor.update_arvados_node(arv_node).get(self.TIMEOUT)
-        self.assertEqual(1, self.updates.sync_node.call_count)
diff --git a/services/nodemanager/tests/test_computenode_dispatch_slurm.py b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
deleted file mode 100644 (file)
index 02d8fb6..0000000
+++ /dev/null
@@ -1,155 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import subprocess32 as subprocess
-import time
-import unittest
-
-import mock
-
-import arvnodeman.computenode.dispatch.slurm as slurm_dispatch
-from . import testutil
-from .test_computenode_dispatch import \
-    ComputeNodeShutdownActorMixin, \
-    ComputeNodeSetupActorTestCase, \
-    ComputeNodeUpdateActorTestCase
-
-@mock.patch('subprocess32.check_output')
-class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
-                                            unittest.TestCase):
-    ACTOR_CLASS = slurm_dispatch.ComputeNodeShutdownActor
-
-    def check_slurm_got_args(self, proc_mock, *args):
-        self.assertTrue(proc_mock.called)
-        slurm_cmd = proc_mock.call_args[0][0]
-        for s in args:
-            self.assertIn(s, slurm_cmd)
-
-    def check_success_after_reset(self, proc_mock, end_state='drain\n', timer=False):
-        self.make_mocks(arvados_node=testutil.arvados_node_mock(63))
-        if not timer:
-            self.timer = testutil.MockTimer(False)
-        self.make_actor()
-        self.check_success_flag(None, 0)
-        # At this point, 1st try should have happened.
-
-        self.timer.deliver()
-        self.check_success_flag(None, 0)
-        # At this point, 2nd try should have happened.
-
-        # Order is critical here: if the mock gets called when no return value
-        # or side effect is set, we may invoke a real subprocess.
-        proc_mock.return_value = end_state
-        proc_mock.side_effect = None
-
-        # 3rd try
-        self.timer.deliver()
-
-        self.check_success_flag(True, 3)
-        self.check_slurm_got_args(proc_mock, 'NodeName=compute63')
-
-    def make_wait_state_test(start_state='drng\n', end_state='drain\n'):
-        def test(self, proc_mock):
-            proc_mock.return_value = start_state
-            self.check_success_after_reset(proc_mock, end_state)
-        return test
-
-    for wait_state in ['alloc\n', 'drng\n']:
-        locals()['test_wait_while_' + wait_state.strip()
-                 ] = make_wait_state_test(start_state=wait_state)
-
-    for end_state in ['idle*\n', 'down\n', 'down*\n', 'drain\n', 'fail\n']:
-        locals()['test_wait_until_' + end_state.strip()
-                 ] = make_wait_state_test(end_state=end_state)
-
-    def test_retry_failed_slurm_calls(self, proc_mock):
-        proc_mock.side_effect = subprocess.CalledProcessError(1, ["mock"])
-        self.check_success_after_reset(proc_mock)
-
-    def test_slurm_bypassed_when_no_arvados_node(self, proc_mock):
-        # Test we correctly handle a node that failed to bootstrap.
-        proc_mock.return_value = 'down\n'
-        self.make_actor(start_time=0)
-        self.check_success_flag(True)
-        self.assertFalse(proc_mock.called)
-
-    def test_node_resumed_when_shutdown_cancelled(self, proc_mock):
-        try:
-            proc_mock.side_effect = iter(['', 'drng\n', 'drng\n', ''])
-            self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
-            self.timer = testutil.MockTimer(False)
-            self.make_actor()
-            self.busywait(lambda: proc_mock.call_args is not None)
-            self.shutdown_actor.cancel_shutdown("test")
-            self.check_success_flag(False, 2)
-            self.assertEqual(proc_mock.call_args_list[0], mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=DRAIN', 'Reason=Node Manager shutdown']))
-            self.assertEqual(proc_mock.call_args_list[-1], mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=RESUME']))
-
-        finally:
-            self.shutdown_actor.actor_ref.stop()
-
-    def test_cancel_shutdown_retry(self, proc_mock):
-        proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n', 'idle\n'])
-        self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
-        self.make_actor()
-        self.check_success_flag(False, 5)
-
-    def test_issue_slurm_drain_retry(self, proc_mock):
-        proc_mock.side_effect = iter([OSError, OSError, 'drng\n', 'drain\n'])
-        self.check_success_after_reset(proc_mock, timer=False)
-
-    def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
-        proc_mock.return_value = 'drain\n'
-        super(SLURMComputeNodeShutdownActorTestCase,
-              self).test_arvados_node_cleaned_after_shutdown()
-
-    def test_cancellable_shutdown(self, proc_mock):
-        proc_mock.return_value = 'other\n'
-        super(SLURMComputeNodeShutdownActorTestCase,
-              self).test_cancellable_shutdown()
-
-    def test_uncancellable_shutdown(self, proc_mock):
-        proc_mock.return_value = 'other\n'
-        super(SLURMComputeNodeShutdownActorTestCase,
-              self).test_uncancellable_shutdown()
-
-@mock.patch('subprocess32.check_output')
-class SLURMComputeNodeUpdateActorTestCase(ComputeNodeUpdateActorTestCase):
-    ACTOR_CLASS = slurm_dispatch.ComputeNodeUpdateActor
-
-    def test_update_node_weight(self, check_output):
-        self.make_actor()
-        cloud_node = testutil.cloud_node_mock()
-        arv_node = testutil.arvados_node_mock()
-        self.updater.sync_node(cloud_node, arv_node).get(self.TIMEOUT)
-        check_output.assert_called_with(['scontrol', 'update', 'NodeName=compute99', 'Weight=99000', 'Features=instancetype=z99.test'])
-
-class SLURMComputeNodeSetupActorTestCase(ComputeNodeSetupActorTestCase):
-    ACTOR_CLASS = slurm_dispatch.ComputeNodeSetupActor
-
-    @mock.patch('subprocess32.check_output')
-    def test_update_node_features(self, check_output):
-        # `scontrol update` happens only if the Arvados node record
-        # has a hostname. ComputeNodeSetupActorTestCase.make_mocks
-        # uses mocks with scrubbed hostnames, so we override with the
-        # default testutil.arvados_node_mock.
-        self.make_mocks(arvados_effect=[testutil.arvados_node_mock()])
-        self.make_actor()
-        self.wait_for_assignment(self.setup_actor, 'cloud_node')
-        check_output.assert_called_with(['scontrol', 'update', 'NodeName=compute99', 'Weight=1000', 'Features=instancetype=z1.test'])
-
-    @mock.patch('subprocess32.check_output')
-    def test_failed_arvados_calls_retried(self, check_output):
-        super(SLURMComputeNodeSetupActorTestCase, self).test_failed_arvados_calls_retried()
-
-    @mock.patch('subprocess32.check_output')
-    def test_subscribe(self, check_output):
-        super(SLURMComputeNodeSetupActorTestCase, self).test_subscribe()
-
-    @mock.patch('subprocess32.check_output')
-    def test_creation_with_arvados_node(self, check_output):
-        super(SLURMComputeNodeSetupActorTestCase, self).test_creation_with_arvados_node()
diff --git a/services/nodemanager/tests/test_computenode_driver.py b/services/nodemanager/tests/test_computenode_driver.py
deleted file mode 100644 (file)
index 4bf4c39..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import unittest
-
-import libcloud.common.types as cloud_types
-import mock
-
-import arvnodeman.computenode.driver as driver_base
-import arvnodeman.status as status
-import arvnodeman.config as config
-from . import testutil
-
-class ComputeNodeDriverTestCase(unittest.TestCase):
-    def setUp(self):
-        self.driver_mock = mock.MagicMock(name='driver_mock')
-        driver_base.BaseComputeNodeDriver.SEARCH_CACHE = {}
-
-    def test_search_for_now_uses_public_method(self):
-        image = testutil.cloud_object_mock(1)
-        self.driver_mock().list_images.return_value = [image]
-        driver = driver_base.BaseComputeNodeDriver({}, {}, {}, self.driver_mock)
-        self.assertIs(image, driver.search_for_now('id_1', 'list_images'))
-        self.assertEqual(1, self.driver_mock().list_images.call_count)
-
-    def test_search_for_now_uses_private_method(self):
-        net = testutil.cloud_object_mock(1)
-        self.driver_mock().ex_list_networks.return_value = [net]
-        driver = driver_base.BaseComputeNodeDriver({}, {}, {}, self.driver_mock)
-        self.assertIs(net, driver.search_for_now('id_1', 'ex_list_networks'))
-        self.assertEqual(1, self.driver_mock().ex_list_networks.call_count)
-
-    def test_search_for_now_raises_ValueError_on_zero_results(self):
-        self.driver_mock().list_images.return_value = []
-        driver = driver_base.BaseComputeNodeDriver({}, {}, {}, self.driver_mock)
-        with self.assertRaises(ValueError) as test:
-            driver.search_for_now('id_1', 'list_images')
-
-    def test_search_for_now_raises_ValueError_on_extra_results(self):
-        image = testutil.cloud_object_mock(1)
-        self.driver_mock().list_images.return_value = [image, image]
-        driver = driver_base.BaseComputeNodeDriver({}, {}, {}, self.driver_mock)
-        with self.assertRaises(ValueError) as test:
-            driver.search_for_now('id_1', 'list_images')
-
-    def test_search_for_now_does_not_cache_results(self):
-        image1 = testutil.cloud_object_mock(1)
-        image2 = testutil.cloud_object_mock(1)
-        self.driver_mock().list_images.side_effect = [[image1], [image2]]
-        driver = driver_base.BaseComputeNodeDriver({}, {}, {}, self.driver_mock)
-        self.assertIsNot(driver.search_for_now('id_1', 'list_images'),
-                         driver.search_for_now('id_1', 'list_images'))
-        self.assertEqual(2, self.driver_mock().list_images.call_count)
-
-    def test_search_for_returns_cached_results(self):
-        image1 = testutil.cloud_object_mock(1)
-        image2 = testutil.cloud_object_mock(1)
-        self.driver_mock().list_images.side_effect = [[image1], [image2]]
-        driver = driver_base.BaseComputeNodeDriver({}, {}, {}, self.driver_mock)
-        self.assertIs(driver.search_for('id_1', 'list_images'),
-                      driver.search_for('id_1', 'list_images'))
-        self.assertEqual(1, self.driver_mock().list_images.call_count)
-
-
-    class TestBaseComputeNodeDriver(driver_base.BaseComputeNodeDriver):
-        def arvados_create_kwargs(self, size, arvados_node):
-            return {'name': arvados_node}
-
-
-    def test_create_node_only_cloud_errors_are_counted(self):
-        status.tracker.update({'create_node_errors': 0})
-        errors = [(config.CLOUD_ERRORS[0], True), (KeyError, False)]
-        self.driver_mock().list_images.return_value = []
-        driver = self.TestBaseComputeNodeDriver({}, {}, {}, self.driver_mock)
-        error_count = 0
-        for an_error, is_cloud_error in errors:
-            self.driver_mock().create_node.side_effect = an_error
-            with self.assertRaises(an_error):
-                driver.create_node(testutil.MockSize(1), 'id_1')
-            if is_cloud_error:
-                error_count += 1
-            self.assertEqual(error_count, status.tracker.get('create_node_errors'))
-
-    def test_list_nodes_only_cloud_errors_are_counted(self):
-        status.tracker.update({'list_nodes_errors': 0})
-        errors = [(config.CLOUD_ERRORS[0], True), (KeyError, False)]
-        driver = self.TestBaseComputeNodeDriver({}, {}, {}, self.driver_mock)
-        error_count = 0
-        for an_error, is_cloud_error in errors:
-            self.driver_mock().list_nodes.side_effect = an_error
-            with self.assertRaises(an_error):
-                driver.list_nodes()
-            if is_cloud_error:
-                error_count += 1
-            self.assertEqual(error_count, status.tracker.get('list_nodes_errors'))
-
-    def test_destroy_node_only_cloud_errors_are_counted(self):
-        status.tracker.update({'destroy_node_errors': 0})
-        errors = [(config.CLOUD_ERRORS[0], True), (KeyError, False)]
-        self.driver_mock().list_nodes.return_value = [testutil.MockSize(1)]
-        driver = self.TestBaseComputeNodeDriver({}, {}, {}, self.driver_mock)
-        error_count = 0
-        for an_error, is_cloud_error in errors:
-            self.driver_mock().destroy_node.side_effect = an_error
-            with self.assertRaises(an_error):
-                driver.destroy_node(testutil.MockSize(1))
-            if is_cloud_error:
-                error_count += 1
-            self.assertEqual(error_count, status.tracker.get('destroy_node_errors'))
diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py
deleted file mode 100644 (file)
index ea7a033..0000000
+++ /dev/null
@@ -1,145 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import ssl
-import time
-import unittest
-
-import libcloud.common.types as cloud_types
-import mock
-
-import arvnodeman.computenode.driver.azure as azure
-from . import testutil
-
-class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
-    TEST_CLASS = azure.ComputeNodeDriver
-
-    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
-        list_kwargs.setdefault("ex_resource_group", "TestResourceGroup")
-        return super(AzureComputeNodeDriverTestCase, self).new_driver(auth_kwargs, list_kwargs, create_kwargs)
-
-    def test_driver_instantiation(self):
-        kwargs = {'key': 'testkey'}
-        driver = self.new_driver(auth_kwargs=kwargs)
-        self.assertTrue(self.driver_mock.called)
-        self.assertEqual(kwargs, self.driver_mock.call_args[1])
-
-    def test_create_image_loaded_at_initialization(self):
-        get_method = self.driver_mock().get_image
-        get_method.return_value = testutil.cloud_object_mock('id_b')
-        driver = self.new_driver(create_kwargs={'image': 'id_b'})
-        self.assertEqual(1, get_method.call_count)
-
-    def test_create_includes_ping(self):
-        arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
-        arv_node["hostname"] = None
-        driver = self.new_driver()
-        driver.create_node(testutil.MockSize(1), arv_node)
-        create_method = self.driver_mock().create_node
-        self.assertTrue(create_method.called)
-        self.assertIn('ping_secret=ssshh',
-                      create_method.call_args[1].get('ex_tags', {}).get('arv-ping-url', ""))
-
-    def test_create_includes_arvados_node_size(self):
-        arv_node = testutil.arvados_node_mock()
-        arv_node["hostname"] = None
-        size = testutil.MockSize(1)
-        driver = self.new_driver()
-        driver.create_node(size, arv_node)
-        create_method = self.driver_mock().create_node
-        self.assertTrue(create_method.called)
-        self.assertIn(
-            ('arvados_node_size', size.id),
-            create_method.call_args[1].get('ex_tags', {'tags': 'missing'}).items()
-        )
-
-    def test_name_from_new_arvados_node(self):
-        arv_node = testutil.arvados_node_mock(hostname=None)
-        driver = self.new_driver()
-        self.assertEqual('compute-000000000000063-zzzzz',
-                         driver.arvados_create_kwargs(testutil.MockSize(1), arv_node)['name'])
-
-    def check_node_tagged(self, cloud_node, expected_tags):
-        tag_mock = self.driver_mock().ex_create_tags
-        self.assertTrue(tag_mock.called)
-        self.assertIs(cloud_node, tag_mock.call_args[0][0])
-        self.assertEqual(expected_tags, tag_mock.call_args[0][1])
-
-    def test_node_create_time(self):
-        refsecs = int(time.time())
-        reftuple = time.gmtime(refsecs)
-        node = testutil.cloud_node_mock()
-        node.extra = {'tags': {'booted_at': time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
-                                                   reftuple)}}
-        self.assertEqual(refsecs, azure.ComputeNodeDriver.node_start_time(node))
-
-    def test_node_fqdn(self):
-        name = 'fqdntest.zzzzz.arvadosapi.com'
-        node = testutil.cloud_node_mock()
-        node.extra = {'tags': {"hostname": name}}
-        self.assertEqual(name, azure.ComputeNodeDriver.node_fqdn(node))
-
-    def test_sync_node(self):
-        arv_node = testutil.arvados_node_mock(1)
-        cloud_node = testutil.cloud_node_mock(2)
-        driver = self.new_driver()
-        driver.sync_node(cloud_node, arv_node)
-        self.check_node_tagged(cloud_node,
-                               {'hostname': 'compute1.zzzzz.arvadosapi.com'})
-
-    def test_custom_data(self):
-        arv_node = testutil.arvados_node_mock(hostname=None)
-        driver = self.new_driver()
-        self.assertEqual("""#!/bin/sh
-mkdir -p    /var/tmp/arv-node-data/meta-data
-echo 'https://100::/arvados/v1/nodes/zzzzz-yyyyy-000000000000063/ping?ping_secret=defaulttestsecret' > /var/tmp/arv-node-data/arv-ping-url
-echo compute-000000000000063-zzzzz > /var/tmp/arv-node-data/meta-data/instance-id
-echo z1.test > /var/tmp/arv-node-data/meta-data/instance-type
-""",
-                         driver.arvados_create_kwargs(testutil.MockSize(1), arv_node)['ex_customdata'])
-
-    def test_list_nodes_ignores_nodes_without_tags(self):
-        driver = self.new_driver(create_kwargs={"tag_arvados-class": "dynamic-compute"})
-        # Mock cloud node without tags
-        nodelist = [testutil.cloud_node_mock(1)]
-        self.driver_mock().list_nodes.return_value = nodelist
-        n = driver.list_nodes()
-        self.assertEqual([], n)
-
-    def test_create_raises_but_actually_succeeded(self):
-        arv_node = testutil.arvados_node_mock(1, hostname=None)
-        driver = self.new_driver(create_kwargs={"tag_arvados-class": "dynamic-compute"})
-        nodelist = [testutil.cloud_node_mock(1, tags={"arvados-class": "dynamic-compute"})]
-        nodelist[0].name = 'compute-000000000000001-zzzzz'
-        self.driver_mock().list_nodes.return_value = nodelist
-        self.driver_mock().create_node.side_effect = IOError
-        n = driver.create_node(testutil.MockSize(1), arv_node)
-        self.assertEqual('compute-000000000000001-zzzzz', n.name)
-
-    def test_ex_fetch_nic_false(self):
-        arv_node = testutil.arvados_node_mock(1, hostname=None)
-        driver = self.new_driver(create_kwargs={"tag_arvados-class": "dynamic-compute"})
-        nodelist = [testutil.cloud_node_mock(1, tags={"arvados-class": "dynamic-compute"})]
-        nodelist[0].name = 'compute-000000000000001-zzzzz'
-        self.driver_mock().list_nodes.return_value = nodelist
-        n = driver.list_nodes()
-        self.assertEqual(nodelist, n)
-        self.driver_mock().list_nodes.assert_called_with(ex_fetch_nic=False, ex_fetch_power_state=False, ex_resource_group='TestResourceGroup')
-
-    def test_create_can_find_node_after_timeout(self):
-        super(AzureComputeNodeDriverTestCase,
-              self).test_create_can_find_node_after_timeout(
-                  create_kwargs={'tag_arvados-class': 'test'},
-                  node_extra={'tags': {'arvados-class': 'test'}})
-
-    def test_node_found_after_timeout_has_fixed_size(self):
-        size = testutil.MockSize(4)
-        node_props = {'hardwareProfile': {'vmSize': size.id}}
-        cloud_node = testutil.cloud_node_mock(tags={'arvados-class': 'test'}, properties=node_props)
-        cloud_node.size = None
-        self.check_node_found_after_timeout_has_fixed_size(
-            size, cloud_node, {'tag_arvados-class': 'test'})
diff --git a/services/nodemanager/tests/test_computenode_driver_ec2.py b/services/nodemanager/tests/test_computenode_driver_ec2.py
deleted file mode 100644 (file)
index 520c0dc..0000000
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import ssl
-import time
-import unittest
-
-import libcloud.common.types as cloud_types
-import mock
-
-import arvnodeman.computenode.driver.ec2 as ec2
-from . import testutil
-
-class EC2ComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
-    TEST_CLASS = ec2.ComputeNodeDriver
-
-    def test_driver_instantiation(self):
-        kwargs = {'key': 'testkey'}
-        driver = self.new_driver(auth_kwargs=kwargs)
-        self.assertTrue(self.driver_mock.called)
-        self.assertEqual(kwargs, self.driver_mock.call_args[1])
-
-    def test_list_kwargs_become_filters(self):
-        # We're also testing tag name translation.
-        driver = self.new_driver(list_kwargs={'tag_test': 'true'})
-        driver.list_nodes()
-        list_method = self.driver_mock().list_nodes
-        self.assertTrue(list_method.called)
-        self.assertEqual({'tag:test': 'true'},
-                          list_method.call_args[1].get('ex_filters'))
-
-    def test_create_image_loaded_at_initialization(self):
-        list_method = self.driver_mock().list_images
-        list_method.return_value = [testutil.cloud_object_mock(c)
-                                    for c in 'abc']
-        driver = self.new_driver(create_kwargs={'image_id': 'id_b'})
-        self.assertEqual(1, list_method.call_count)
-
-    def test_create_includes_ping_secret(self):
-        arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
-        driver = self.new_driver()
-        driver.create_node(testutil.MockSize(1), arv_node)
-        create_method = self.driver_mock().create_node
-        self.assertTrue(create_method.called)
-        self.assertIn('ping_secret=ssshh',
-                      create_method.call_args[1].get('ex_userdata',
-                                                     'arg missing'))
-
-    def test_create_includes_metadata(self):
-        arv_node = testutil.arvados_node_mock()
-        driver = self.new_driver(list_kwargs={'tag_test': 'testvalue'})
-        driver.create_node(testutil.MockSize(1), arv_node)
-        create_method = self.driver_mock().create_node
-        self.assertTrue(create_method.called)
-        self.assertIn(
-            ('test', 'testvalue'),
-            create_method.call_args[1].get('ex_metadata', {'arg': 'missing'}).items()
-        )
-
-    def test_create_includes_arvados_node_size(self):
-        arv_node = testutil.arvados_node_mock()
-        size = testutil.MockSize(1)
-        driver = self.new_driver()
-        driver.create_node(size, arv_node)
-        create_method = self.driver_mock().create_node
-        self.assertTrue(create_method.called)
-        self.assertIn(
-            ('arvados_node_size', size.id),
-            create_method.call_args[1].get('ex_metadata', {'arg': 'missing'}).items()
-        )
-
-    def test_create_preemptible_instance(self):
-        arv_node = testutil.arvados_node_mock()
-        driver = self.new_driver()
-        driver.create_node(testutil.MockSize(1, preemptible=True), arv_node)
-        create_method = self.driver_mock().create_node
-        self.assertTrue(create_method.called)
-        self.assertEqual(
-            True,
-            create_method.call_args[1].get('ex_spot_market', 'arg missing')
-        )
-
-    def test_hostname_from_arvados_node(self):
-        arv_node = testutil.arvados_node_mock(8)
-        driver = self.new_driver()
-        self.assertEqual('compute8.zzzzz.arvadosapi.com',
-                         driver.arvados_create_kwargs(testutil.MockSize(1), arv_node)['name'])
-
-    def test_default_hostname_from_new_arvados_node(self):
-        arv_node = testutil.arvados_node_mock(hostname=None)
-        driver = self.new_driver()
-        self.assertEqual('dynamic.compute.zzzzz.arvadosapi.com',
-                         driver.arvados_create_kwargs(testutil.MockSize(1), arv_node)['name'])
-
-    def check_node_tagged(self, cloud_node, expected_tags):
-        tag_mock = self.driver_mock().ex_create_tags
-        self.assertTrue(tag_mock.called)
-        self.assertIs(cloud_node, tag_mock.call_args[0][0])
-        self.assertEqual(expected_tags, tag_mock.call_args[0][1])
-
-    def test_sync_node(self):
-        arv_node = testutil.arvados_node_mock(1)
-        cloud_node = testutil.cloud_node_mock(2)
-        driver = self.new_driver()
-        driver.sync_node(cloud_node, arv_node)
-        self.check_node_tagged(cloud_node,
-                               {'Name': 'compute1.zzzzz.arvadosapi.com'})
-
-    def test_node_create_time(self):
-        refsecs = int(time.time())
-        reftuple = time.gmtime(refsecs)
-        node = testutil.cloud_node_mock()
-        node.extra = {'launch_time': time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
-                                                   reftuple)}
-        self.assertEqual(refsecs, ec2.ComputeNodeDriver.node_start_time(node))
-
-    def test_node_fqdn(self):
-        name = 'fqdntest.zzzzz.arvadosapi.com'
-        node = testutil.cloud_node_mock()
-        node.name = name
-        self.assertEqual(name, ec2.ComputeNodeDriver.node_fqdn(node))
-
-    def test_create_ebs_volume(self):
-        arv_node = testutil.arvados_node_mock()
-        driver = self.new_driver()
-        # libcloud/ec2 "disk" sizes are in GB, Arvados/SLURM "scratch" value is in MB
-        size = testutil.MockSize(1)
-        size.disk=5
-        size.scratch=20000
-        driver.create_node(size, arv_node)
-        create_method = self.driver_mock().create_node
-        self.assertTrue(create_method.called)
-        self.assertEqual([{
-            "DeviceName": "/dev/xvdt",
-            "Ebs": {
-                "DeleteOnTermination": True,
-                "VolumeSize": 16,
-                "VolumeType": "gp2"
-            }}],
-                         create_method.call_args[1].get('ex_blockdevicemappings'))
-
-    def test_ebs_volume_not_needed(self):
-        arv_node = testutil.arvados_node_mock()
-        driver = self.new_driver()
-        # libcloud/ec2 "disk" sizes are in GB, Arvados/SLURM "scratch" value is in MB
-        size = testutil.MockSize(1)
-        size.disk=80
-        size.scratch=20000
-        driver.create_node(size, arv_node)
-        create_method = self.driver_mock().create_node
-        self.assertTrue(create_method.called)
-        self.assertIsNone(create_method.call_args[1].get('ex_blockdevicemappings'))
-
-    def test_ebs_volume_too_big(self):
-        arv_node = testutil.arvados_node_mock()
-        driver = self.new_driver()
-        # libcloud/ec2 "disk" sizes are in GB, Arvados/SLURM "scratch" value is in MB
-        size = testutil.MockSize(1)
-        size.disk=80
-        size.scratch=20000000
-        driver.create_node(size, arv_node)
-        create_method = self.driver_mock().create_node
-        self.assertTrue(create_method.called)
-        self.assertEqual([{
-            "DeviceName": "/dev/xvdt",
-            "Ebs": {
-                "DeleteOnTermination": True,
-                "VolumeSize": 16384,
-                "VolumeType": "gp2"
-            }}],
-                         create_method.call_args[1].get('ex_blockdevicemappings'))
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py
deleted file mode 100644 (file)
index 1446cd2..0000000
+++ /dev/null
@@ -1,252 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import json
-import time
-import unittest
-
-import mock
-
-import arvnodeman.computenode.driver.gce as gce
-from . import testutil
-
-class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
-    TEST_CLASS = gce.ComputeNodeDriver
-
-    def setUp(self):
-        super(GCEComputeNodeDriverTestCase, self).setUp()
-        self.driver_mock().list_images.return_value = [
-            testutil.cloud_object_mock('testimage', selfLink='image-link')]
-        self.driver_mock().ex_list_disktypes.return_value = [
-            testutil.cloud_object_mock(name, selfLink=name + '-link')
-            for name in ['pd-standard', 'pd-ssd', 'local-ssd']]
-        self.driver_mock.reset_mock()
-
-    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
-        create_kwargs.setdefault('image', 'testimage')
-        return super(GCEComputeNodeDriverTestCase, self).new_driver(
-            auth_kwargs, list_kwargs, create_kwargs)
-
-    def test_driver_instantiation(self):
-        kwargs = {'user_id': 'foo'}
-        driver = self.new_driver(auth_kwargs=kwargs)
-        self.assertTrue(self.driver_mock.called)
-        self.assertEqual(kwargs, self.driver_mock.call_args[1])
-
-    def test_create_image_loaded_at_initialization_by_name(self):
-        image_mocks = [testutil.cloud_object_mock(c) for c in 'abc']
-        list_method = self.driver_mock().list_images
-        list_method.return_value = image_mocks
-        driver = self.new_driver(create_kwargs={'image': 'b'})
-        self.assertEqual(1, list_method.call_count)
-
-    def test_create_includes_ping_secret(self):
-        arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
-        driver = self.new_driver()
-        driver.create_node(testutil.MockSize(1), arv_node)
-        metadata = self.driver_mock().create_node.call_args[1]['ex_metadata']
-        self.assertIn('ping_secret=ssshh', metadata.get('arv-ping-url'))
-
-    def test_create_includes_arvados_node_size(self):
-        arv_node = testutil.arvados_node_mock()
-        size = testutil.MockSize(1)
-        driver = self.new_driver()
-        driver.create_node(size, arv_node)
-        create_method = self.driver_mock().create_node
-        self.assertIn(
-            ('arvados_node_size', size.id),
-            create_method.call_args[1].get('ex_metadata', {'metadata':'missing'}).items()
-        )
-
-    def test_create_raises_but_actually_succeeded(self):
-        arv_node = testutil.arvados_node_mock(1, hostname=None)
-        driver = self.new_driver()
-        nodelist = [testutil.cloud_node_mock(1)]
-        nodelist[0].name = 'compute-000000000000001-zzzzz'
-        self.driver_mock().list_nodes.return_value = nodelist
-        self.driver_mock().create_node.side_effect = IOError
-        n = driver.create_node(testutil.MockSize(1), arv_node)
-        self.assertEqual('compute-000000000000001-zzzzz', n.name)
-
-    def test_create_sets_default_hostname(self):
-        driver = self.new_driver()
-        driver.create_node(testutil.MockSize(1),
-                           testutil.arvados_node_mock(254, hostname=None))
-        create_kwargs = self.driver_mock().create_node.call_args[1]
-        self.assertEqual('compute-0000000000000fe-zzzzz',
-                         create_kwargs.get('name'))
-        self.assertEqual('dynamic.compute.zzzzz.arvadosapi.com',
-                         create_kwargs.get('ex_metadata', {}).get('hostname'))
-
-    def test_create_tags_from_list_tags(self):
-        driver = self.new_driver(list_kwargs={'tags': 'testA, testB'})
-        driver.create_node(testutil.MockSize(1), testutil.arvados_node_mock())
-        self.assertEqual(['testA', 'testB'],
-                         self.driver_mock().create_node.call_args[1]['ex_tags'])
-
-    def test_create_with_two_disks_attached(self):
-        driver = self.new_driver(create_kwargs={'image': 'testimage'})
-        driver.create_node(testutil.MockSize(1), testutil.arvados_node_mock())
-        create_disks = self.driver_mock().create_node.call_args[1].get(
-            'ex_disks_gce_struct', [])
-        self.assertEqual(2, len(create_disks))
-        self.assertTrue(create_disks[0].get('autoDelete'))
-        self.assertTrue(create_disks[0].get('boot'))
-        self.assertEqual('PERSISTENT', create_disks[0].get('type'))
-        init_params = create_disks[0].get('initializeParams', {})
-        self.assertEqual('pd-standard-link', init_params.get('diskType'))
-        self.assertEqual('image-link', init_params.get('sourceImage'))
-        # Our node images expect the SSD to be named `tmp` to find and mount it.
-        self.assertEqual('tmp', create_disks[1].get('deviceName'))
-        self.assertTrue(create_disks[1].get('autoDelete'))
-        self.assertFalse(create_disks[1].get('boot', 'unset'))
-        self.assertEqual('SCRATCH', create_disks[1].get('type'))
-        init_params = create_disks[1].get('initializeParams', {})
-        self.assertEqual('local-ssd-link', init_params.get('diskType'))
-
-    def test_list_nodes_requires_tags_match(self):
-        # A node matches if our list tags are a subset of the node's tags.
-        # Test behavior with no tags, no match, partial matches, different
-        # order, and strict supersets.
-        cloud_mocks = [
-            testutil.cloud_node_mock(node_num, tags=tag_set)
-            for node_num, tag_set in enumerate(
-                [[], ['bad'], ['good'], ['great'], ['great', 'ok'],
-                 ['great', 'good'], ['good', 'fantastic', 'great']])]
-        cloud_mocks.append(testutil.cloud_node_mock())
-        self.driver_mock().list_nodes.return_value = cloud_mocks
-        driver = self.new_driver(list_kwargs={'tags': 'good, great'})
-        self.assertItemsEqual(['5', '6'], [n.id for n in driver.list_nodes()])
-
-    def build_gce_metadata(self, metadata_dict):
-        # Convert a plain metadata dictionary to the GCE data structure.
-        return {
-            'kind': 'compute#metadata',
-            'fingerprint': 'testprint',
-            'items': [{'key': key, 'value': metadata_dict[key]}
-                      for key in metadata_dict],
-            }
-
-    def check_sync_node_updates_hostname_tag(self, plain_metadata):
-        start_metadata = self.build_gce_metadata(plain_metadata)
-        arv_node = testutil.arvados_node_mock(1)
-        cloud_node = testutil.cloud_node_mock(
-            2, metadata=start_metadata.copy(),
-            zone=testutil.cloud_object_mock('testzone'))
-        self.driver_mock().ex_get_node.return_value = cloud_node
-        driver = self.new_driver()
-        driver.sync_node(cloud_node, arv_node)
-        args, kwargs = self.driver_mock().ex_set_node_metadata.call_args
-        self.assertEqual(cloud_node, args[0])
-        plain_metadata['hostname'] = 'compute1.zzzzz.arvadosapi.com'
-        self.assertEqual(
-            plain_metadata,
-            {item['key']: item['value'] for item in args[1]})
-
-    def test_sync_node_updates_hostname_tag(self):
-        self.check_sync_node_updates_hostname_tag(
-            {'testkey': 'testvalue', 'hostname': 'startvalue'})
-
-    def test_sync_node_adds_hostname_tag(self):
-        self.check_sync_node_updates_hostname_tag({'testkey': 'testval'})
-
-    def test_sync_node_raises_exception_on_failure(self):
-        arv_node = testutil.arvados_node_mock(8)
-        cloud_node = testutil.cloud_node_mock(
-            9, metadata={}, zone=testutil.cloud_object_mock('failzone'))
-        mock_response = self.driver_mock().ex_set_node_metadata.side_effect = (Exception('sync error test'),)
-        driver = self.new_driver()
-        with self.assertRaises(Exception) as err_check:
-            driver.sync_node(cloud_node, arv_node)
-        self.assertIs(err_check.exception.__class__, Exception)
-        self.assertIn('sync error test', str(err_check.exception))
-
-    def test_node_create_time_zero_for_unknown_nodes(self):
-        node = testutil.cloud_node_mock()
-        self.assertEqual(0, gce.ComputeNodeDriver.node_start_time(node))
-
-    def test_node_create_time_for_known_node(self):
-        node = testutil.cloud_node_mock(metadata=self.build_gce_metadata(
-                {'booted_at': '1970-01-01T00:01:05Z'}))
-        self.assertEqual(65, gce.ComputeNodeDriver.node_start_time(node))
-
-    def test_node_create_time_recorded_when_node_boots(self):
-        start_time = time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())
-        arv_node = testutil.arvados_node_mock()
-        driver = self.new_driver()
-        driver.create_node(testutil.MockSize(1), arv_node)
-        metadata = self.driver_mock().create_node.call_args[1]['ex_metadata']
-        self.assertLessEqual(start_time, metadata.get('booted_at'))
-
-    def test_known_node_fqdn(self):
-        name = 'fqdntest.zzzzz.arvadosapi.com'
-        node = testutil.cloud_node_mock(metadata=self.build_gce_metadata(
-                {'hostname': name}))
-        self.assertEqual(name, gce.ComputeNodeDriver.node_fqdn(node))
-
-    def test_unknown_node_fqdn(self):
-        # Return an empty string.  This lets fqdn be safely compared
-        # against an expected value, and ComputeNodeMonitorActor
-        # should try to update it.
-        node = testutil.cloud_node_mock(metadata=self.build_gce_metadata({}))
-        self.assertEqual('', gce.ComputeNodeDriver.node_fqdn(node))
-
-    def test_deliver_ssh_key_in_metadata(self):
-        test_ssh_key = 'ssh-rsa-foo'
-        arv_node = testutil.arvados_node_mock(1)
-        with mock.patch('__builtin__.open',
-                        mock.mock_open(read_data=test_ssh_key)) as mock_file:
-            driver = self.new_driver(create_kwargs={'ssh_key': 'ssh-key-file'})
-        mock_file.assert_called_once_with('ssh-key-file')
-        driver.create_node(testutil.MockSize(1), arv_node)
-        metadata = self.driver_mock().create_node.call_args[1]['ex_metadata']
-        self.assertEqual('root:ssh-rsa-foo', metadata.get('sshKeys'))
-
-    def test_create_driver_with_service_accounts(self):
-        service_accounts = {'email': 'foo@bar', 'scopes': ['storage-full']}
-        srv_acct_config = {'service_accounts': json.dumps(service_accounts)}
-        arv_node = testutil.arvados_node_mock(1)
-        driver = self.new_driver(create_kwargs=srv_acct_config)
-        driver.create_node(testutil.MockSize(1), arv_node)
-        self.assertEqual(
-            service_accounts,
-            self.driver_mock().create_node.call_args[1]['ex_service_accounts'])
-
-    def test_fix_string_size(self):
-        # As of 0.18, the libcloud GCE driver sets node.size to the size's name.
-        # It's supposed to be the actual size object.  Make sure our driver
-        # patches that up in listings.
-        size = testutil.MockSize(2)
-        node = testutil.cloud_node_mock(size=size)
-        node.size = size.id
-        self.driver_mock().list_sizes.return_value = [size]
-        self.driver_mock().list_nodes.return_value = [node]
-        driver = self.new_driver()
-        nodelist = driver.list_nodes()
-        self.assertEqual(1, len(nodelist))
-        self.assertIs(node, nodelist[0])
-        self.assertIs(size, nodelist[0].size)
-
-    def test_skip_fix_when_size_not_string(self):
-        # Ensure we don't monkeypatch node sizes unless we need to.
-        size = testutil.MockSize(3)
-        node = testutil.cloud_node_mock(size=size)
-        self.driver_mock().list_nodes.return_value = [node]
-        driver = self.new_driver()
-        nodelist = driver.list_nodes()
-        self.assertEqual(1, len(nodelist))
-        self.assertIs(node, nodelist[0])
-        self.assertIs(size, nodelist[0].size)
-
-    def test_node_found_after_timeout_has_fixed_size(self):
-        size = testutil.MockSize(4)
-        cloud_node = testutil.cloud_node_mock(size=size.id)
-        self.check_node_found_after_timeout_has_fixed_size(size, cloud_node)
-
-    def test_list_empty_nodes(self):
-        self.driver_mock().list_nodes.return_value = []
-        self.assertEqual([], self.new_driver().list_nodes())
diff --git a/services/nodemanager/tests/test_config.py b/services/nodemanager/tests/test_config.py
deleted file mode 100644 (file)
index 8002b3b..0000000
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import io
-import logging
-import unittest
-
-import arvnodeman.computenode.dispatch as dispatch
-import arvnodeman.computenode.dispatch.slurm as slurm_dispatch
-import arvnodeman.config as nmconfig
-
-class NodeManagerConfigTestCase(unittest.TestCase):
-    TEST_CONFIG = u"""
-[Cloud]
-provider = dummy
-shutdown_windows = 52, 6, 2
-
-[Cloud Credentials]
-creds = dummy_creds
-
-[Cloud List]
-[Cloud Create]
-
-[Size 1]
-cores = 1
-price = 0.8
-
-[Size 1.preemptible]
-instance_type = 1
-preemptible = true
-cores = 1
-price = 0.8
-
-[Logging]
-file = /dev/null
-level = DEBUG
-testlogger = INFO
-"""
-
-    def load_config(self, config=None, config_str=None):
-        if config is None:
-            config = nmconfig.NodeManagerConfig()
-        if config_str is None:
-            config_str = self.TEST_CONFIG
-        with io.StringIO(config_str) as config_fp:
-            config.readfp(config_fp)
-        return config
-
-    def test_seeded_defaults(self):
-        config = nmconfig.NodeManagerConfig()
-        sec_names = set(config.sections())
-        self.assertIn('Arvados', sec_names)
-        self.assertIn('Daemon', sec_names)
-        self.assertFalse(any(name.startswith('Size ') for name in sec_names))
-
-    def test_list_sizes(self):
-        config = self.load_config()
-        sizes = config.node_sizes()
-        self.assertEqual(2, len(sizes))
-        size, kwargs = sizes[0]
-        self.assertEqual('Small', size.name)
-        self.assertEqual(1, kwargs['cores'])
-        self.assertEqual(0.8, kwargs['price'])
-        # preemptible is False by default
-        self.assertEqual(False, kwargs['preemptible'])
-        # instance_type == arvados node size id by default
-        self.assertEqual(kwargs['id'], kwargs['instance_type'])
-        # Now retrieve the preemptible version
-        size, kwargs = sizes[1]
-        self.assertEqual('Small', size.name)
-        self.assertEqual('1.preemptible', kwargs['id'])
-        self.assertEqual(1, kwargs['cores'])
-        self.assertEqual(0.8, kwargs['price'])
-        self.assertEqual(True, kwargs['preemptible'])
-        self.assertEqual('1', kwargs['instance_type'])
-
-
-    def test_default_node_mem_scaling(self):
-        config = self.load_config()
-        self.assertEqual(0.95, config.getfloat('Daemon', 'node_mem_scaling'))
-
-    def test_shutdown_windows(self):
-        config = self.load_config()
-        self.assertEqual([52, 6, 2], config.shutdown_windows())
-
-    def test_log_levels(self):
-        config = self.load_config()
-        self.assertEqual({'level': logging.DEBUG,
-                          'testlogger': logging.INFO},
-                         config.log_levels())
-
-    def check_dispatch_classes(self, config, module):
-        setup, shutdown, update, monitor = config.dispatch_classes()
-        self.assertIs(setup, module.ComputeNodeSetupActor)
-        self.assertIs(shutdown, module.ComputeNodeShutdownActor)
-        self.assertIs(update, module.ComputeNodeUpdateActor)
-        self.assertIs(monitor, module.ComputeNodeMonitorActor)
-
-    def test_default_dispatch(self):
-        config = self.load_config()
-        self.check_dispatch_classes(config, dispatch)
-
-    def test_custom_dispatch(self):
-        config = self.load_config(
-            config_str=self.TEST_CONFIG + "[Daemon]\ndispatcher=slurm\n")
-        self.check_dispatch_classes(config, slurm_dispatch)
diff --git a/services/nodemanager/tests/test_daemon.py b/services/nodemanager/tests/test_daemon.py
deleted file mode 100644 (file)
index 1b6e4ca..0000000
+++ /dev/null
@@ -1,858 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import time
-import unittest
-
-import mock
-import pykka
-
-import arvnodeman.daemon as nmdaemon
-import arvnodeman.status as status
-from arvnodeman.jobqueue import ServerCalculator
-from arvnodeman.computenode.dispatch import ComputeNodeMonitorActor
-from . import testutil
-from . import test_status
-from . import pykka_timeout
-import logging
-
-class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
-                                     unittest.TestCase):
-
-    def assertwait(self, f, timeout=pykka_timeout*2):
-        deadline = time.time() + timeout
-        while True:
-            try:
-                return f()
-            except AssertionError:
-                if time.time() > deadline:
-                    raise
-                pass
-            time.sleep(.1)
-            self.daemon.ping().get(self.TIMEOUT)
-
-    def busywait(self, f):
-        for n in xrange(200):
-            ok = f()
-            if ok:
-                return
-            time.sleep(.1)
-            self.daemon.ping().get(self.TIMEOUT)
-        self.assertTrue(ok) # always falsy, but not necessarily False
-
-    def mock_node_start(self, **kwargs):
-        # Make sure that every time the daemon starts a setup actor,
-        # it gets a new mock object back.
-        get_cloud_size = mock.MagicMock()
-        get_cloud_size.get.return_value = kwargs["cloud_size"]
-        mock_actor = mock.MagicMock()
-        mock_proxy = mock.NonCallableMock(name='setup_mock_proxy',
-                                          cloud_size=get_cloud_size,
-                                          actor_ref=mock_actor)
-        mock_actor.proxy.return_value = mock_proxy
-        mock_actor.tell_proxy.return_value = mock_proxy
-
-        self.last_setup = mock_proxy
-        return mock_actor
-
-    def mock_node_shutdown(self, **kwargs):
-        # Make sure that every time the daemon starts a shutdown actor,
-        # it gets a new mock object back.
-        get_cloud_node = mock.MagicMock()
-        if "node_monitor" in kwargs:
-            get_cloud_node.get.return_value = kwargs["node_monitor"].proxy().cloud_node.get()
-        mock_actor = mock.MagicMock()
-        mock_proxy = mock.NonCallableMock(name='shutdown_mock_proxy',
-                                          cloud_node=get_cloud_node,
-                                          actor_ref=mock_actor)
-
-        mock_actor.proxy.return_value = mock_proxy
-        self.last_shutdown = mock_proxy
-
-        return mock_actor
-
-    def make_daemon(self, cloud_nodes=[], arvados_nodes=[], want_sizes=[],
-                    avail_sizes=None,
-                    min_nodes=0, max_nodes=8,
-                    shutdown_windows=[54, 5, 1],
-                    max_total_price=None):
-        for name in ['cloud_nodes', 'arvados_nodes', 'server_wishlist']:
-            setattr(self, name + '_poller', mock.MagicMock(name=name + '_mock'))
-
-        if not avail_sizes:
-            if cloud_nodes or want_sizes:
-                avail_sizes=[(c.size, {"cores": int(c.id)}) for c in cloud_nodes] + [(s, {"cores": 1}) for s in want_sizes]
-            else:
-                avail_sizes=[(testutil.MockSize(1), {"cores": 1})]
-
-        self.arv_factory = mock.MagicMock(name='arvados_mock')
-        api_client = mock.MagicMock(name='api_client')
-        api_client.nodes().create().execute.side_effect = \
-            [testutil.arvados_node_mock(1),
-             testutil.arvados_node_mock(2)]
-        self.arv_factory.return_value = api_client
-
-        self.cloud_factory = mock.MagicMock(name='cloud_mock')
-        self.cloud_factory().node_start_time.return_value = time.time()
-        self.cloud_updates = mock.MagicMock(name='updates_mock')
-        self.timer = testutil.MockTimer(deliver_immediately=False)
-        self.cloud_factory().node_id.side_effect = lambda node: node.id
-        self.cloud_factory().broken.return_value = False
-
-        self.node_setup = mock.MagicMock(name='setup_mock')
-        self.node_setup.start.side_effect = self.mock_node_start
-        self.node_setup.reset_mock()
-
-        self.node_shutdown = mock.MagicMock(name='shutdown_mock')
-        self.node_shutdown.start.side_effect = self.mock_node_shutdown
-
-        self.daemon = nmdaemon.NodeManagerDaemonActor.start(
-            self.server_wishlist_poller, self.arvados_nodes_poller,
-            self.cloud_nodes_poller, self.cloud_updates, self.timer,
-            self.arv_factory, self.cloud_factory,
-            shutdown_windows, ServerCalculator(avail_sizes),
-            min_nodes, max_nodes, 600, 1800, 3600,
-            self.node_setup, self.node_shutdown,
-            max_total_price=max_total_price).proxy()
-        if arvados_nodes is not None:
-            self.daemon.update_arvados_nodes(arvados_nodes).get(self.TIMEOUT)
-        if cloud_nodes is not None:
-            self.daemon.update_cloud_nodes(cloud_nodes).get(self.TIMEOUT)
-        if want_sizes is not None:
-            self.daemon.update_server_wishlist(want_sizes).get(self.TIMEOUT)
-
-    def monitor_list(self):
-        return [c.actor.actor_ref for c in self.daemon.cloud_nodes.get(self.TIMEOUT).nodes.values() if c.actor]
-
-    def monitored_arvados_nodes(self, include_unpaired=True):
-        pairings = []
-        for future in [actor.proxy().arvados_node
-                       for actor in self.monitor_list()]:
-            try:
-                g = future.get(self.TIMEOUT)
-                if g or include_unpaired:
-                    pairings.append(g)
-            except pykka.ActorDeadError:
-                pass
-        return pairings
-
-    def alive_monitor_count(self):
-        return len(self.monitored_arvados_nodes())
-
-    def paired_monitor_count(self):
-        return len(self.monitored_arvados_nodes(False))
-
-    def assertShutdownCancellable(self, expected=True):
-        self.assertTrue(self.node_shutdown.start.called)
-        self.assertIs(expected,
-                      self.node_shutdown.start.call_args[1]['cancellable'],
-                      "ComputeNodeShutdownActor incorrectly cancellable")
-
-    def test_easy_node_creation(self):
-        size = testutil.MockSize(1)
-        self.make_daemon(want_sizes=[size])
-        self.busywait(lambda: self.node_setup.start.called)
-        self.assertIn('node_quota', status.tracker._latest)
-
-    def check_monitors_arvados_nodes(self, *arv_nodes):
-        self.assertwait(lambda: self.assertItemsEqual(arv_nodes, self.monitored_arvados_nodes()))
-
-    def test_node_pairing(self):
-        cloud_node = testutil.cloud_node_mock(1)
-        arv_node = testutil.arvados_node_mock(1)
-        self.make_daemon([cloud_node], [arv_node])
-        self.check_monitors_arvados_nodes(arv_node)
-
-    def test_node_pairing_after_arvados_update(self):
-        cloud_node = testutil.cloud_node_mock(2)
-        self.make_daemon([cloud_node],
-                         [testutil.arvados_node_mock(1, ip_address=None)])
-        arv_node = testutil.arvados_node_mock(2)
-        self.daemon.update_arvados_nodes([arv_node]).get(self.TIMEOUT)
-        self.check_monitors_arvados_nodes(arv_node)
-
-    def test_arvados_node_un_and_re_paired(self):
-        # We need to create the Arvados node mock after spinning up the daemon
-        # to make sure it's new enough to pair with the cloud node.
-        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(3)],
-                         arvados_nodes=None)
-        arv_node = testutil.arvados_node_mock(3)
-        self.daemon.update_arvados_nodes([arv_node]).get(self.TIMEOUT)
-        self.check_monitors_arvados_nodes(arv_node)
-        self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
-        self.busywait(lambda: 0 == self.alive_monitor_count())
-        self.daemon.update_cloud_nodes([testutil.cloud_node_mock(3)])
-        self.check_monitors_arvados_nodes(arv_node)
-
-    def test_old_arvados_node_not_double_assigned(self):
-        arv_node = testutil.arvados_node_mock(3, age=9000)
-        size = testutil.MockSize(3)
-        self.make_daemon(arvados_nodes=[arv_node],
-                         avail_sizes=[(size, {"cores":1})])
-        self.daemon.update_server_wishlist([size]).get(self.TIMEOUT)
-        self.daemon.update_server_wishlist([size, size]).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        used_nodes = [call[1].get('arvados_node')
-                      for call in self.node_setup.start.call_args_list]
-        self.assertEqual(2, len(used_nodes))
-        self.assertIn(arv_node, used_nodes)
-        self.assertIn(None, used_nodes)
-
-    def test_node_count_satisfied(self):
-        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1)],
-                         want_sizes=[testutil.MockSize(1)])
-        self.busywait(lambda: not self.node_setup.start.called)
-
-    def test_select_stale_node_records_with_slot_numbers_first(self):
-        """
-        Stale node records with slot_number assigned can exist when
-        clean_arvados_node() isn't executed after a node shutdown, for
-        various reasons.
-        NodeManagerDaemonActor should use these stale node records first, so
-        that they don't accumulate unused, reducing the slots available.
-        """
-        size = testutil.MockSize(1)
-        a_long_time_ago = '1970-01-01T01:02:03.04050607Z'
-        arvados_nodes = []
-        for n in range(9):
-            # Add several stale node records without slot_number assigned
-            arvados_nodes.append(
-                testutil.arvados_node_mock(
-                    n+1,
-                    slot_number=None,
-                    modified_at=a_long_time_ago))
-        # Add one record with stale_node assigned, it should be the
-        # first one selected
-        arv_node = testutil.arvados_node_mock(
-            123,
-            modified_at=a_long_time_ago)
-        arvados_nodes.append(arv_node)
-        cloud_node = testutil.cloud_node_mock(125, size=size)
-        self.make_daemon(cloud_nodes=[cloud_node],
-                         arvados_nodes=arvados_nodes)
-        arvados_nodes_tracker = self.daemon.arvados_nodes.get()
-        # Here, find_stale_node() should return the node record with
-        # the slot_number assigned.
-        self.assertEqual(arv_node,
-                         arvados_nodes_tracker.find_stale_node(3601))
-
-    def test_dont_count_missing_as_busy(self):
-        size = testutil.MockSize(1)
-        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1, size=size),
-                                      testutil.cloud_node_mock(2, size=size)],
-                         arvados_nodes=[testutil.arvados_node_mock(1),
-                                        testutil.arvados_node_mock(
-                                            2,
-                                            last_ping_at='1970-01-01T01:02:03.04050607Z')],
-                         want_sizes=[size, size])
-        self.busywait(lambda: 2 == self.alive_monitor_count())
-        self.busywait(lambda: self.node_setup.start.called)
-
-    def test_missing_counts_towards_max(self):
-        size = testutil.MockSize(1)
-        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1, size=size),
-                                      testutil.cloud_node_mock(2, size=size)],
-                         arvados_nodes=[testutil.arvados_node_mock(1),
-                                        testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
-                         want_sizes=[size, size],
-                         max_nodes=2)
-        self.busywait(lambda: not self.node_setup.start.called)
-
-    def test_excess_counts_missing(self):
-        size = testutil.MockSize(1)
-        cloud_nodes = [testutil.cloud_node_mock(1, size=size), testutil.cloud_node_mock(2, size=size)]
-        self.make_daemon(cloud_nodes=cloud_nodes,
-                         arvados_nodes=[testutil.arvados_node_mock(1),
-                                        testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
-                         want_sizes=[size])
-        self.assertwait(lambda: self.assertEqual(2, self.paired_monitor_count()))
-        for mon_ref in self.monitor_list():
-            self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
-        self.assertEqual(1, self.node_shutdown.start.call_count)
-
-    def test_missing_shutdown_not_excess(self):
-        size = testutil.MockSize(1)
-        cloud_nodes = [testutil.cloud_node_mock(1, size=size), testutil.cloud_node_mock(2, size=size)]
-        self.make_daemon(cloud_nodes=cloud_nodes,
-                         arvados_nodes=[testutil.arvados_node_mock(1),
-                                        testutil.arvados_node_mock(2, last_ping_at='1970-01-01T01:02:03.04050607Z')],
-                         want_sizes=[size])
-        self.assertwait(lambda: self.assertEqual(2, self.paired_monitor_count()))
-        get_cloud_node = mock.MagicMock(name="get_cloud_node")
-        get_cloud_node.get.return_value = cloud_nodes[1]
-        mock_node_monitor = mock.MagicMock()
-        mock_node_monitor.proxy.return_value = mock.NonCallableMock(cloud_node=get_cloud_node)
-        mock_shutdown = self.node_shutdown.start(node_monitor=mock_node_monitor)
-
-        self.daemon.cloud_nodes.get()[cloud_nodes[1].id].shutdown_actor = mock_shutdown.proxy()
-
-        self.assertwait(lambda: self.assertEqual(2, self.alive_monitor_count()))
-        for mon_ref in self.monitor_list():
-            self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
-        self.busywait(lambda: 1 == self.node_shutdown.start.call_count)
-
-    def test_booting_nodes_counted(self):
-        cloud_node = testutil.cloud_node_mock(1)
-        arv_node = testutil.arvados_node_mock(1)
-        server_wishlist = [testutil.MockSize(1)] * 2
-        self.make_daemon([cloud_node], [arv_node], server_wishlist)
-        self.daemon.max_nodes.get(self.TIMEOUT)
-        self.assertTrue(self.node_setup.start.called)
-        self.daemon.update_server_wishlist(server_wishlist).get(self.TIMEOUT)
-        self.busywait(lambda: 1 == self.node_setup.start.call_count)
-
-    def test_boot_new_node_when_all_nodes_busy(self):
-        size = testutil.MockSize(2)
-        arv_node = testutil.arvados_node_mock(2, job_uuid=True)
-        self.make_daemon([testutil.cloud_node_mock(2, size=size)], [arv_node],
-                         [size], avail_sizes=[(size, {"cores":1})])
-        self.assertwait(lambda: self.assertEqual(1, self.paired_monitor_count()))
-        self.assertwait(lambda: self.assertEqual(1, self.node_setup.start.called))
-
-    def test_boot_new_node_below_min_nodes(self):
-        min_size = testutil.MockSize(1)
-        wish_size = testutil.MockSize(3)
-        avail_sizes = [(min_size, {"cores": 1}),
-                       (wish_size, {"cores": 3})]
-        self.make_daemon([], [], None, avail_sizes=avail_sizes, min_nodes=2)
-        self.daemon.update_server_wishlist([wish_size]).get(self.TIMEOUT)
-        self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
-        self.daemon.update_server_wishlist([wish_size]).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertEqual([wish_size, min_size],
-                         [call[1].get('cloud_size')
-                          for call in self.node_setup.start.call_args_list])
-
-    def test_no_new_node_when_ge_min_nodes_busy(self):
-        size = testutil.MockSize(2)
-        cloud_nodes = [testutil.cloud_node_mock(n, size=size) for n in range(1, 4)]
-        arv_nodes = [testutil.arvados_node_mock(n, job_uuid=True)
-                     for n in range(1, 4)]
-        self.make_daemon(cloud_nodes, arv_nodes, [], min_nodes=2)
-        self.stop_proxy(self.daemon)
-        self.assertEqual(0, self.node_setup.start.call_count)
-
-    def test_no_new_node_when_max_nodes_busy(self):
-        size = testutil.MockSize(3)
-        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(3)],
-                         arvados_nodes=[testutil.arvados_node_mock(3, job_uuid=True)],
-                         want_sizes=[size],
-                         max_nodes=1)
-        self.stop_proxy(self.daemon)
-        self.assertFalse(self.node_setup.start.called)
-
-    def start_node_boot(self, cloud_node=None, arv_node=None, id_num=1):
-        if cloud_node is None:
-            cloud_node = testutil.cloud_node_mock(id_num)
-        id_num = int(cloud_node.id)
-        if arv_node is None:
-            arv_node = testutil.arvados_node_mock(id_num)
-        self.make_daemon(want_sizes=[testutil.MockSize(id_num)],
-                         avail_sizes=[(testutil.MockSize(id_num), {"cores":1})])
-        self.daemon.max_nodes.get(self.TIMEOUT)
-        self.assertEqual(1, self.node_setup.start.call_count)
-        self.last_setup.cloud_node.get.return_value = cloud_node
-        self.last_setup.arvados_node.get.return_value = arv_node
-        return self.last_setup
-
-    def test_new_node_when_booted_node_not_usable(self):
-        cloud_node = testutil.cloud_node_mock(4)
-        arv_node = testutil.arvados_node_mock(4, crunch_worker_state='down')
-        setup = self.start_node_boot(cloud_node, arv_node)
-        self.daemon.node_setup_finished(setup).get(self.TIMEOUT)
-        self.assertEqual(1, self.alive_monitor_count())
-        self.daemon.update_arvados_nodes([arv_node])
-        self.daemon.update_cloud_nodes([cloud_node])
-        self.monitor_list()[0].proxy().cloud_node_start_time = time.time()-1801
-        self.daemon.update_server_wishlist(
-            [testutil.MockSize(4)]).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertEqual(2, self.node_setup.start.call_count)
-
-    def test_no_duplication_when_booting_node_listed_fast(self):
-        # Test that we don't start two ComputeNodeMonitorActors when
-        # we learn about a booting node through a listing before we
-        # get the "node up" message from CloudNodeSetupActor.
-        cloud_node = testutil.cloud_node_mock(1)
-        setup = self.start_node_boot(cloud_node)
-        self.daemon.update_cloud_nodes([cloud_node])
-        self.daemon.node_setup_finished(setup).get(self.TIMEOUT)
-        self.assertEqual(1, self.alive_monitor_count())
-
-    def test_no_duplication_when_booted_node_listed(self):
-        cloud_node = testutil.cloud_node_mock(2)
-        setup = self.start_node_boot(cloud_node, id_num=2)
-        self.daemon.node_setup_finished(setup)
-        self.daemon.update_cloud_nodes([cloud_node]).get(self.TIMEOUT)
-        self.assertEqual(1, self.alive_monitor_count())
-
-    def test_node_counted_after_boot_with_slow_listing(self):
-        # Test that, after we boot a compute node, we assume it exists
-        # even it doesn't appear in the listing (e.g., because of delays
-        # propagating tags).
-        setup = self.start_node_boot()
-        self.daemon.node_setup_finished(setup).get(self.TIMEOUT)
-        self.assertEqual(1, self.alive_monitor_count())
-        self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
-        self.assertEqual(1, self.alive_monitor_count())
-
-    def test_booted_unlisted_node_counted(self):
-        setup = self.start_node_boot(id_num=1)
-        self.daemon.node_setup_finished(setup)
-        self.daemon.update_server_wishlist(
-            [testutil.MockSize(1)]).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertEqual(1, self.node_setup.start.call_count)
-
-    def test_booted_node_can_shutdown(self):
-        setup = self.start_node_boot()
-        self.daemon.node_setup_finished(setup).get(self.TIMEOUT)
-        self.assertEqual(1, self.alive_monitor_count())
-        monitor = self.monitor_list()[0].proxy()
-        self.daemon.update_server_wishlist([])
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.daemon.update_server_wishlist([]).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertTrue(self.node_shutdown.start.called,
-                        "daemon did not shut down booted node on offer")
-
-        with test_status.TestServer() as srv:
-            self.assertEqual(0, srv.get_status().get('nodes_unpaired', None))
-            self.assertEqual(1, srv.get_status().get('nodes_shutdown', None))
-            self.assertEqual(0, srv.get_status().get('nodes_wish', None))
-
-    def test_booted_node_lifecycle(self):
-        cloud_node = testutil.cloud_node_mock(6)
-        setup = self.start_node_boot(cloud_node, id_num=6)
-        self.daemon.node_setup_finished(setup).get(self.TIMEOUT)
-        self.assertEqual(1, self.alive_monitor_count())
-        monitor = self.monitor_list()[0].proxy()
-        self.daemon.update_server_wishlist([])
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.assertShutdownCancellable(True)
-        shutdown = self.node_shutdown.start().proxy()
-        shutdown.cloud_node.get.return_value = cloud_node
-        self.daemon.node_finished_shutdown(shutdown).get(self.TIMEOUT)
-        self.daemon.update_cloud_nodes([])
-        self.assertTrue(shutdown.stop.called,
-                        "shutdown actor not stopped after finishing")
-        self.assertTrue(monitor.actor_ref.actor_stopped.wait(self.TIMEOUT),
-                        "monitor for booted node not stopped after shutdown")
-        self.daemon.update_server_wishlist(
-            [testutil.MockSize(2)]).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertTrue(self.node_setup.start.called,
-                        "second node not started after booted node stopped")
-
-    def test_node_disappearing_during_shutdown(self):
-        cloud_node = testutil.cloud_node_mock(6)
-        setup = self.start_node_boot(cloud_node, id_num=6)
-        self.daemon.node_setup_finished(setup).get(self.TIMEOUT)
-        self.assertEqual(1, self.alive_monitor_count())
-        monitor = self.monitor_list()[0].proxy()
-        self.daemon.update_server_wishlist([])
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.assertShutdownCancellable(True)
-        shutdown = self.node_shutdown.start().proxy()
-        shutdown.cloud_node.get.return_value = cloud_node
-        # Simulate a successful but slow node destroy call: the cloud node
-        # list gets updated before the ShutdownActor finishes.
-        record = self.daemon.cloud_nodes.get().nodes.values()[0]
-        self.assertTrue(record.shutdown_actor is not None)
-        self.daemon.cloud_nodes.get().nodes.clear()
-        self.daemon.node_finished_shutdown(shutdown).get(self.TIMEOUT)
-        self.assertTrue(
-            record.shutdown_actor is not None,
-            "test was ineffective -- failed to simulate the race condition")
-
-    def test_booted_node_shut_down_when_never_listed(self):
-        setup = self.start_node_boot()
-        self.cloud_factory().node_start_time.return_value = time.time() - 3601
-        self.daemon.node_setup_finished(setup).get(self.TIMEOUT)
-        self.assertEqual(1, self.alive_monitor_count())
-        self.assertFalse(self.node_shutdown.start.called)
-        now = time.time()
-        self.monitor_list()[0].tell_proxy().consider_shutdown()
-        self.busywait(lambda: self.node_shutdown.start.called)
-        self.assertShutdownCancellable(False)
-
-    def test_booted_node_shut_down_when_never_paired(self):
-        cloud_node = testutil.cloud_node_mock(2)
-        setup = self.start_node_boot(cloud_node)
-        self.cloud_factory().node_start_time.return_value = time.time() - 3601
-        self.daemon.node_setup_finished(setup).get(self.TIMEOUT)
-        self.assertEqual(1, self.alive_monitor_count())
-        self.daemon.update_cloud_nodes([cloud_node])
-        self.monitor_list()[0].tell_proxy().consider_shutdown()
-        self.busywait(lambda: self.node_shutdown.start.called)
-        self.assertShutdownCancellable(False)
-
-    def test_booted_node_shut_down_when_never_working(self):
-        cloud_node = testutil.cloud_node_mock(4)
-        arv_node = testutil.arvados_node_mock(4, crunch_worker_state='down')
-        setup = self.start_node_boot(cloud_node, arv_node)
-        self.daemon.update_arvados_nodes([arv_node]).get(self.TIMEOUT)
-        self.daemon.node_setup_finished(setup).get(self.TIMEOUT)
-        self.assertEqual(1, self.alive_monitor_count())
-        self.monitor_list()[0].proxy().cloud_node_start_time = time.time()-3601
-        self.daemon.update_cloud_nodes([cloud_node])
-        self.busywait(lambda: self.node_shutdown.start.called)
-        self.assertShutdownCancellable(False)
-
-    def test_node_that_pairs_not_considered_failed_boot(self):
-        cloud_node = testutil.cloud_node_mock(3)
-        arv_node = testutil.arvados_node_mock(3)
-        setup = self.start_node_boot(cloud_node, arv_node)
-        self.daemon.node_setup_finished(setup).get(self.TIMEOUT)
-        self.assertEqual(1, self.alive_monitor_count())
-        self.daemon.update_cloud_nodes([cloud_node])
-        self.daemon.update_arvados_nodes([arv_node]).get(self.TIMEOUT)
-        self.timer.deliver()
-        self.stop_proxy(self.daemon)
-        self.assertFalse(self.node_shutdown.start.called)
-
-    def test_node_that_pairs_busy_not_considered_failed_boot(self):
-        cloud_node = testutil.cloud_node_mock(5)
-        arv_node = testutil.arvados_node_mock(5, job_uuid=True)
-        setup = self.start_node_boot(cloud_node, arv_node)
-        self.daemon.node_setup_finished(setup).get(self.TIMEOUT)
-        self.assertEqual(1, self.alive_monitor_count())
-        self.daemon.update_cloud_nodes([cloud_node])
-        self.daemon.update_arvados_nodes([arv_node]).get(self.TIMEOUT)
-        self.timer.deliver()
-        self.stop_proxy(self.daemon)
-        self.assertFalse(self.node_shutdown.start.called)
-
-    def test_booting_nodes_shut_down(self):
-        self.make_daemon(want_sizes=[testutil.MockSize(1)])
-        self.daemon.update_server_wishlist([]).get(self.TIMEOUT)
-        self.busywait(lambda: self.last_setup.stop_if_no_cloud_node.called)
-
-    def test_all_booting_nodes_tried_to_shut_down(self):
-        size = testutil.MockSize(2)
-        self.make_daemon(want_sizes=[size], avail_sizes=[(size, {"cores":1})])
-        self.daemon.max_nodes.get(self.TIMEOUT)
-        setup1 = self.last_setup
-        setup1.stop_if_no_cloud_node().get.return_value = False
-        setup1.stop_if_no_cloud_node.reset_mock()
-        self.daemon.update_server_wishlist([size, size]).get(self.TIMEOUT)
-        self.daemon.max_nodes.get(self.TIMEOUT)
-        self.assertIsNot(setup1, self.last_setup)
-        self.last_setup.stop_if_no_cloud_node().get.return_value = True
-        self.last_setup.stop_if_no_cloud_node.reset_mock()
-        self.daemon.update_server_wishlist([]).get(self.TIMEOUT)
-        self.daemon.max_nodes.get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertEqual(1, self.last_setup.stop_if_no_cloud_node.call_count)
-        self.assertTrue(setup1.stop_if_no_cloud_node.called)
-
-    def test_shutdown_declined_at_wishlist_capacity(self):
-        cloud_node = testutil.cloud_node_mock(1)
-        arv_node = testutil.arvados_node_mock(1)
-        size = testutil.MockSize(1)
-        self.make_daemon(cloud_nodes=[cloud_node], arvados_nodes=[arv_node], want_sizes=[size])
-        self.assertwait(lambda: self.assertEqual(1, self.paired_monitor_count()))
-        monitor = self.monitor_list()[0].proxy()
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertFalse(self.node_shutdown.start.called)
-
-    def test_shutdown_declined_below_min_nodes(self):
-        cloud_node = testutil.cloud_node_mock(1)
-        arv_node = testutil.arvados_node_mock(1)
-        self.make_daemon(cloud_nodes=[cloud_node], arvados_nodes=[arv_node], min_nodes=1)
-        self.assertwait(lambda: self.assertEqual(1, self.paired_monitor_count()))
-        monitor = self.monitor_list()[0].proxy()
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertFalse(self.node_shutdown.start.called)
-
-    def test_shutdown_accepted_below_capacity(self):
-        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
-        self.busywait(lambda: 1 == self.alive_monitor_count())
-        monitor = self.monitor_list()[0].proxy()
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.busywait(lambda: self.node_shutdown.start.called)
-
-    def test_shutdown_declined_when_idle_and_job_queued(self):
-        size = testutil.MockSize(1)
-        cloud_nodes = [testutil.cloud_node_mock(n, size=size) for n in [3, 4]]
-        arv_nodes = [testutil.arvados_node_mock(3, job_uuid=True),
-                     testutil.arvados_node_mock(4, job_uuid=None)]
-        self.make_daemon(cloud_nodes, arv_nodes, [size])
-        self.assertwait(lambda: self.assertEqual(2, self.paired_monitor_count()))
-        for mon_ref in self.monitor_list():
-            monitor = mon_ref.proxy()
-            if monitor.cloud_node.get(self.TIMEOUT) is cloud_nodes[-1]:
-                break
-        else:
-            self.fail("monitor for idle node not found")
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertFalse(self.node_shutdown.start.called)
-
-    def test_node_shutdown_after_cancelled_shutdown(self):
-        cloud_node = testutil.cloud_node_mock(5)
-        self.make_daemon([cloud_node], [testutil.arvados_node_mock(5)])
-        self.assertEqual(1, self.alive_monitor_count())
-        monitor = self.monitor_list()[0].proxy()
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.last_shutdown.success.get.return_value = False
-        self.daemon.node_finished_shutdown(self.last_shutdown).get(self.TIMEOUT)
-        self.assertwait(lambda: self.assertEqual(1, self.paired_monitor_count()))
-
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.last_shutdown.success.get.return_value = True
-        self.last_shutdown.stop.side_effect = lambda: monitor.stop()
-        self.daemon.node_finished_shutdown(self.last_shutdown).get(self.TIMEOUT)
-        self.assertwait(lambda: self.assertEqual(0, self.paired_monitor_count()))
-
-    def test_nodes_shutting_down_replaced_below_max_nodes(self):
-        size = testutil.MockSize(6)
-        cloud_node = testutil.cloud_node_mock(6, size=size)
-        self.make_daemon([cloud_node], [testutil.arvados_node_mock(6, crunch_worker_state='down')],
-                         avail_sizes=[(size, {"cores":1})])
-        self.assertEqual(1, self.alive_monitor_count())
-        monitor = self.monitor_list()[0].proxy()
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.assertTrue(self.node_shutdown.start.called)
-        getmock = mock.MagicMock()
-        getmock.get.return_value = False
-        self.last_shutdown.cancel_shutdown.return_value = getmock
-        self.daemon.update_server_wishlist(
-            [testutil.MockSize(6)]).get(self.TIMEOUT)
-        self.busywait(lambda: self.node_setup.start.called)
-
-    def test_nodes_shutting_down_cancelled(self):
-        size = testutil.MockSize(6)
-        cloud_node = testutil.cloud_node_mock(6, size=size)
-        self.make_daemon([cloud_node], [testutil.arvados_node_mock(6, crunch_worker_state='down')],
-                         avail_sizes=[(size, {"cores":1})])
-        self.assertEqual(1, self.alive_monitor_count())
-        monitor = self.monitor_list()[0].proxy()
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.assertTrue(self.node_shutdown.start.called)
-        self.daemon.update_server_wishlist(
-            [testutil.MockSize(6)]).get(self.TIMEOUT)
-        self.busywait(lambda: self.last_shutdown.cancel_shutdown.called)
-
-    def test_nodes_shutting_down_not_replaced_at_max_nodes(self):
-        cloud_node = testutil.cloud_node_mock(7)
-        self.make_daemon([cloud_node], [testutil.arvados_node_mock(7)],
-                         max_nodes=1)
-        self.assertwait(lambda: self.assertEqual(1, self.paired_monitor_count()))
-        monitor = self.monitor_list()[0].proxy()
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.assertTrue(self.node_shutdown.start.called)
-        self.daemon.update_server_wishlist(
-            [testutil.MockSize(7)]).get(self.TIMEOUT)
-        self.busywait(lambda: not self.node_setup.start.called)
-
-    def test_nodes_shutting_down_count_against_excess(self):
-        size = testutil.MockSize(8)
-        cloud_nodes = [testutil.cloud_node_mock(n, size=size) for n in [8, 9]]
-        arv_nodes = [testutil.arvados_node_mock(n, size=size) for n in [8, 9]]
-        self.make_daemon(cloud_nodes, arv_nodes, [size],
-                         avail_sizes=[(size, {"cores":1})])
-        self.assertwait(lambda: self.assertEqual(2, self.paired_monitor_count()))
-        for mon_ref in self.monitor_list():
-            self.daemon.node_can_shutdown(mon_ref.proxy()).get(self.TIMEOUT)
-        self.assertEqual(1, self.node_shutdown.start.call_count)
-
-    def test_clean_shutdown_waits_for_node_setup_finish(self):
-        new_node = self.start_node_boot()
-        new_node.stop_if_no_cloud_node().get.return_value = False
-        new_node.stop_if_no_cloud_node.reset_mock()
-        self.daemon.shutdown().get(self.TIMEOUT)
-        self.assertTrue(new_node.stop_if_no_cloud_node.called)
-        self.daemon.node_setup_finished(new_node).get(self.TIMEOUT)
-        self.assertTrue(new_node.stop.called)
-        self.timer.deliver()
-        self.assertTrue(
-            self.daemon.actor_ref.actor_stopped.wait(self.TIMEOUT))
-
-    def test_wishlist_ignored_after_shutdown(self):
-        new_node = self.start_node_boot()
-        new_node.stop_if_no_cloud_node().get.return_value = False
-        new_node.stop_if_no_cloud_node.reset_mock()
-        self.daemon.shutdown().get(self.TIMEOUT)
-        size = testutil.MockSize(2)
-        self.daemon.update_server_wishlist([size] * 2).get(self.TIMEOUT)
-        self.timer.deliver()
-        self.busywait(lambda: 1 == self.node_setup.start.call_count)
-
-    def test_shutdown_actor_stopped_when_cloud_node_delisted(self):
-        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
-        self.assertEqual(1, self.alive_monitor_count())
-        monitor = self.monitor_list()[0].proxy()
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
-        self.busywait(lambda: 1 == self.last_shutdown.stop.call_count)
-
-    def test_idle_node_disappearing_clears_status_idle_time_counter(self):
-        size = testutil.MockSize(1)
-        status.tracker._idle_nodes = {}
-        cloud_nodes = [testutil.cloud_node_mock(1, size=size)]
-        arv_nodes = [testutil.arvados_node_mock(1, job_uuid=None)]
-        self.make_daemon(cloud_nodes, arv_nodes, [size])
-        self.assertwait(lambda: self.assertEqual(1, self.paired_monitor_count()))
-        for mon_ref in self.monitor_list():
-            monitor = mon_ref.proxy()
-            if monitor.cloud_node.get(self.TIMEOUT) is cloud_nodes[-1]:
-                break
-        else:
-            self.fail("monitor for idle node not found")
-        self.assertEqual(1, status.tracker.get('nodes_idle'))
-        hostname = monitor.arvados_node.get()['hostname']
-        self.assertIn(hostname, status.tracker._idle_nodes)
-        # Simulate the node disappearing from the cloud node list
-        self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
-        self.busywait(lambda: 0 == self.alive_monitor_count())
-        self.assertNotIn(hostname, status.tracker._idle_nodes)
-
-    def test_shutdown_actor_cleanup_copes_with_dead_actors(self):
-        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock()])
-        self.assertEqual(1, self.alive_monitor_count())
-        monitor = self.monitor_list()[0].proxy()
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        # We're mainly testing that update_cloud_nodes catches and handles
-        # the ActorDeadError.
-        self.last_shutdown.stop.side_effect = pykka.ActorDeadError
-        self.daemon.update_cloud_nodes([]).get(self.TIMEOUT)
-        self.busywait(lambda: 1 == self.last_shutdown.stop.call_count)
-
-    def test_node_create_two_sizes(self):
-        small = testutil.MockSize(1)
-        big = testutil.MockSize(2)
-        avail_sizes = [(testutil.MockSize(1), {"cores":1}),
-                        (testutil.MockSize(2), {"cores":2})]
-        self.make_daemon(want_sizes=[small, small, small, big],
-                         avail_sizes=avail_sizes, max_nodes=4)
-
-        # the daemon runs in another thread, so we need to wait and see
-        # if it does all the work we're expecting it to do before stopping it.
-        self.busywait(lambda: self.node_setup.start.call_count == 4)
-        booting = self.daemon.booting.get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        sizecounts = {a[0].id: 0 for a in avail_sizes}
-        for b in booting.itervalues():
-            sizecounts[b.cloud_size.get().id] += 1
-        logging.info(sizecounts)
-        self.assertEqual(3, sizecounts[small.id])
-        self.assertEqual(1, sizecounts[big.id])
-
-    def test_node_max_nodes_two_sizes(self):
-        small = testutil.MockSize(1)
-        big = testutil.MockSize(2)
-        avail_sizes = [(testutil.MockSize(1), {"cores":1}),
-                        (testutil.MockSize(2), {"cores":2})]
-        self.make_daemon(want_sizes=[small, small, big, small],
-                         avail_sizes=avail_sizes, max_nodes=3)
-
-        # the daemon runs in another thread, so we need to wait and see
-        # if it does all the work we're expecting it to do before stopping it.
-        self.busywait(lambda: self.node_setup.start.call_count == 3)
-        booting = self.daemon.booting.get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        sizecounts = {a[0].id: 0 for a in avail_sizes}
-        for b in booting.itervalues():
-            sizecounts[b.cloud_size.get().id] += 1
-        self.assertEqual(2, sizecounts[small.id])
-        self.assertEqual(1, sizecounts[big.id])
-
-    def test_wishlist_ordering(self):
-        # Check that big nodes aren't prioritized; since #12199 containers are
-        # scheduled on specific node sizes.
-        small = testutil.MockSize(1)
-        big = testutil.MockSize(2)
-        avail_sizes = [(testutil.MockSize(1), {"cores":1}),
-                        (testutil.MockSize(2), {"cores":2})]
-        self.make_daemon(want_sizes=[small, small, small, big],
-                         avail_sizes=avail_sizes, max_nodes=3)
-
-        # the daemon runs in another thread, so we need to wait and see
-        # if it does all the work we're expecting it to do before stopping it.
-        self.busywait(lambda: self.node_setup.start.call_count == 3)
-        booting = self.daemon.booting.get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        sizecounts = {a[0].id: 0 for a in avail_sizes}
-        for b in booting.itervalues():
-            sizecounts[b.cloud_size.get().id] += 1
-        self.assertEqual(3, sizecounts[small.id])
-        self.assertEqual(0, sizecounts[big.id])
-
-    def test_wishlist_reconfigure(self):
-        small = testutil.MockSize(1)
-        big = testutil.MockSize(2)
-        avail_sizes = [(small, {"cores":1}), (big, {"cores":2})]
-
-        self.make_daemon(cloud_nodes=[testutil.cloud_node_mock(1, small),
-                                      testutil.cloud_node_mock(2, small),
-                                      testutil.cloud_node_mock(3, big)],
-                         arvados_nodes=[testutil.arvados_node_mock(1),
-                                        testutil.arvados_node_mock(2),
-                                        testutil.arvados_node_mock(3)],
-                         want_sizes=[small, small, big],
-                         avail_sizes=avail_sizes)
-        self.assertwait(lambda: self.assertEqual(3, self.paired_monitor_count()))
-        self.daemon.update_server_wishlist([small, big, big]).get(self.TIMEOUT)
-
-        self.assertEqual(0, self.node_shutdown.start.call_count)
-
-        for c in self.daemon.cloud_nodes.get().nodes.itervalues():
-            self.daemon.node_can_shutdown(c.actor)
-
-        booting = self.daemon.booting.get()
-        cloud_nodes = self.daemon.cloud_nodes.get()
-
-        self.busywait(lambda: 1 == self.node_setup.start.call_count)
-        self.busywait(lambda: 1 == self.node_shutdown.start.call_count)
-
-        self.stop_proxy(self.daemon)
-
-        # booting a new big node
-        sizecounts = {a[0].id: 0 for a in avail_sizes}
-        for b in booting.itervalues():
-            sizecounts[b.cloud_size.get().id] += 1
-        self.assertEqual(0, sizecounts[small.id])
-        self.assertEqual(1, sizecounts[big.id])
-
-        # shutting down a small node
-        sizecounts = {a[0].id: 0 for a in avail_sizes}
-        for b in cloud_nodes.nodes.itervalues():
-            if b.shutdown_actor is not None:
-                sizecounts[b.cloud_node.size.id] += 1
-        self.assertEqual(1, sizecounts[small.id])
-        self.assertEqual(0, sizecounts[big.id])
-
-    def test_node_max_price(self):
-        small = testutil.MockSize(1)
-        big = testutil.MockSize(2)
-        avail_sizes = [(testutil.MockSize(1), {"cores":1, "price":1}),
-                        (testutil.MockSize(2), {"cores":2, "price":2})]
-        self.make_daemon(want_sizes=[small, small, small, big],
-                         avail_sizes=avail_sizes,
-                         max_nodes=4,
-                         max_total_price=4)
-        # the daemon runs in another thread, so we need to wait and see
-        # if it does all the work we're expecting it to do before stopping it.
-        self.busywait(lambda: self.node_setup.start.call_count == 3)
-        booting = self.daemon.booting.get()
-        self.stop_proxy(self.daemon)
-
-        sizecounts = {a[0].id: 0 for a in avail_sizes}
-        for b in booting.itervalues():
-            sizecounts[b.cloud_size.get().id] += 1
-        logging.info(sizecounts)
-
-        # Booting 3 small nodes and not booting a big node would also partially
-        # satisfy the wishlist and come in under the price cap, however the way
-        # the update_server_wishlist() currently works effectively results in a
-        # round-robin creation of one node of each size in the wishlist, so
-        # test for that.
-        self.assertEqual(2, sizecounts[small.id])
-        self.assertEqual(1, sizecounts[big.id])
diff --git a/services/nodemanager/tests/test_failure.py b/services/nodemanager/tests/test_failure.py
deleted file mode 100644 (file)
index 8bf3ea8..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import errno
-import logging
-import time
-import threading
-import unittest
-
-import mock
-import pykka
-
-from . import testutil
-
-import arvnodeman.baseactor
-import arvnodeman.status as status
-
-class BogusActor(arvnodeman.baseactor.BaseNodeManagerActor):
-    def __init__(self, e, killfunc=None):
-        super(BogusActor, self).__init__(killfunc=killfunc)
-        self.exp = e
-
-    def doStuff(self):
-        raise self.exp
-
-    def ping(self):
-        # Called by WatchdogActorTest, this delay is longer than the test timeout
-        # of 1 second, which should cause the watchdog ping to fail.
-        time.sleep(2)
-        return True
-
-class ActorUnhandledExceptionTest(testutil.ActorTestMixin, unittest.TestCase):
-    def test_fatal_error(self):
-        for e in (MemoryError(), threading.ThreadError(), OSError(errno.ENOMEM, "")):
-            kill_mock = mock.Mock('os.kill')
-            bgact = BogusActor.start(e, killfunc=kill_mock)
-            act_thread = bgact.proxy().get_thread().get()
-            act = bgact.tell_proxy()
-            act.doStuff()
-            act.actor_ref.stop(block=True)
-            act_thread.join()
-            self.assertTrue(kill_mock.called)
-
-    def test_nonfatal_error(self):
-        status.tracker.update({'actor_exceptions': 0})
-        kill_mock = mock.Mock('os.kill')
-        bgact = BogusActor.start(OSError(errno.ENOENT, ""), killfunc=kill_mock)
-        act_thread = bgact.proxy().get_thread().get()
-        act = bgact.tell_proxy()
-        act.doStuff()
-        act.actor_ref.stop(block=True)
-        act_thread.join()
-        self.assertFalse(kill_mock.called)
-        self.assertEqual(1, status.tracker.get('actor_exceptions'))
-
-class WatchdogActorTest(testutil.ActorTestMixin, unittest.TestCase):
-
-    def test_time_timout(self):
-        kill_mock = mock.Mock('os.kill')
-        act = BogusActor.start(OSError(errno.ENOENT, ""))
-        watch = arvnodeman.baseactor.WatchdogActor.start(1, act, killfunc=kill_mock)
-        time.sleep(1)
-        watch.stop(block=True)
-        act.stop(block=True)
-        self.assertTrue(kill_mock.called)
diff --git a/services/nodemanager/tests/test_jobqueue.py b/services/nodemanager/tests/test_jobqueue.py
deleted file mode 100644 (file)
index de83b68..0000000
+++ /dev/null
@@ -1,239 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import unittest
-import mock
-
-import arvnodeman.jobqueue as jobqueue
-from . import testutil
-
-class ServerCalculatorTestCase(unittest.TestCase):
-    def make_calculator(self, factors, **kwargs):
-        return jobqueue.ServerCalculator(
-            [(testutil.MockSize(n), {'cores': n}) for n in factors], **kwargs)
-
-    def calculate(self, servcalc, *constraints):
-        return servcalc.servers_for_queue(
-            [{'uuid': 'zzzzz-jjjjj-{:015x}'.format(index),
-              'runtime_constraints': cdict}
-             for index, cdict in enumerate(constraints)])
-
-    def test_empty_queue_needs_no_servers(self):
-        servcalc = self.make_calculator([1])
-        self.assertEqual(([], {}), servcalc.servers_for_queue([]))
-
-    def test_easy_server_count(self):
-        servcalc = self.make_calculator([1])
-        servlist, _ = self.calculate(servcalc, {'min_nodes': 3})
-        self.assertEqual(3, len(servlist))
-
-    def test_default_5pct_ram_value_decrease(self):
-        servcalc = self.make_calculator([1])
-        servlist, _ = self.calculate(servcalc, {'min_ram_mb_per_node': 128})
-        self.assertEqual(0, len(servlist))
-        servlist, _ = self.calculate(servcalc, {'min_ram_mb_per_node': 121})
-        self.assertEqual(1, len(servlist))
-
-    def test_custom_node_mem_scaling_factor(self):
-        # Simulate a custom 'node_mem_scaling' config parameter by passing
-        # the value to ServerCalculator
-        servcalc = self.make_calculator([1], node_mem_scaling=0.5)
-        servlist, _ = self.calculate(servcalc, {'min_ram_mb_per_node': 128})
-        self.assertEqual(0, len(servlist))
-        servlist, _ = self.calculate(servcalc, {'min_ram_mb_per_node': 64})
-        self.assertEqual(1, len(servlist))
-
-    def test_implicit_server_count(self):
-        servcalc = self.make_calculator([1])
-        servlist, _ = self.calculate(servcalc, {}, {'min_nodes': 3})
-        self.assertEqual(4, len(servlist))
-
-    def test_bad_min_nodes_override(self):
-        servcalc = self.make_calculator([1])
-        servlist, _ = self.calculate(servcalc,
-                                     {'min_nodes': -2}, {'min_nodes': 'foo'})
-        self.assertEqual(2, len(servlist))
-
-    def test_ignore_and_return_unsatisfiable_jobs(self):
-        servcalc = self.make_calculator([1], max_nodes=9)
-        servlist, u_jobs = self.calculate(servcalc,
-                                          {'min_cores_per_node': 2},
-                                          {'min_ram_mb_per_node': 256},
-                                          {'min_nodes': 6},
-                                          {'min_nodes': 12},
-                                          {'min_scratch_mb_per_node': 300000})
-        self.assertEqual(6, len(servlist))
-        # Only unsatisfiable jobs are returned on u_jobs
-        self.assertIn('zzzzz-jjjjj-000000000000000', u_jobs.keys())
-        self.assertIn('zzzzz-jjjjj-000000000000001', u_jobs.keys())
-        self.assertNotIn('zzzzz-jjjjj-000000000000002', u_jobs.keys())
-        self.assertIn('zzzzz-jjjjj-000000000000003', u_jobs.keys())
-        self.assertIn('zzzzz-jjjjj-000000000000004', u_jobs.keys())
-
-    def test_ignore_too_expensive_jobs(self):
-        servcalc = self.make_calculator([1, 2], max_nodes=12, max_price=6)
-        servlist, _ = self.calculate(servcalc,
-                                     {'min_cores_per_node': 1, 'min_nodes': 6})
-        self.assertEqual(6, len(servlist))
-
-        servlist, _ = self.calculate(servcalc,
-                                     {'min_cores_per_node': 2, 'min_nodes': 6})
-        self.assertEqual(0, len(servlist))
-
-    def test_job_requesting_max_nodes_accepted(self):
-        servcalc = self.make_calculator([1], max_nodes=4)
-        servlist, _ = self.calculate(servcalc, {'min_nodes': 4})
-        self.assertEqual(4, len(servlist))
-
-    def test_cheapest_size(self):
-        servcalc = self.make_calculator([2, 4, 1, 3])
-        self.assertEqual(testutil.MockSize(1), servcalc.cheapest_size())
-
-    def test_next_biggest(self):
-        servcalc = self.make_calculator([1, 2, 4, 8])
-        servlist, _ = self.calculate(servcalc,
-                                     {'min_cores_per_node': 3},
-                                     {'min_cores_per_node': 6})
-        self.assertEqual([servcalc.cloud_sizes[2].id,
-                          servcalc.cloud_sizes[3].id],
-                         [s.id for s in servlist])
-
-    def test_multiple_sizes(self):
-        servcalc = self.make_calculator([1, 2])
-        servlist, _ = self.calculate(servcalc,
-                                     {'min_cores_per_node': 2},
-                                     {'min_cores_per_node': 1},
-                                     {'min_cores_per_node': 1})
-        self.assertEqual([servcalc.cloud_sizes[1].id,
-                          servcalc.cloud_sizes[0].id,
-                          servcalc.cloud_sizes[0].id],
-                         [s.id for s in servlist])
-
-        servlist, _ = self.calculate(servcalc,
-                                     {'min_cores_per_node': 1},
-                                     {'min_cores_per_node': 2},
-                                     {'min_cores_per_node': 1})
-        self.assertEqual([servcalc.cloud_sizes[0].id,
-                          servcalc.cloud_sizes[1].id,
-                          servcalc.cloud_sizes[0].id],
-                         [s.id for s in servlist])
-
-        servlist, _ = self.calculate(servcalc,
-                                     {'min_cores_per_node': 1},
-                                     {'min_cores_per_node': 1},
-                                     {'min_cores_per_node': 2})
-        self.assertEqual([servcalc.cloud_sizes[0].id,
-                          servcalc.cloud_sizes[0].id,
-                          servcalc.cloud_sizes[1].id],
-                         [s.id for s in servlist])
-
-
-
-class JobQueueMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
-                                   unittest.TestCase):
-    TEST_CLASS = jobqueue.JobQueueMonitorActor
-
-
-    class MockCalculator(object):
-        @staticmethod
-        def servers_for_queue(queue):
-            return ([testutil.MockSize(n) for n in queue], {})
-
-
-    class MockCalculatorUnsatisfiableJobs(object):
-        @staticmethod
-        def servers_for_queue(queue):
-            return ([], {k["uuid"]: "Unsatisfiable job mock" for k in queue})
-
-
-    def build_monitor(self, side_effect, *args, **kwargs):
-        super(JobQueueMonitorActorTestCase, self).build_monitor(*args, **kwargs)
-        self.client.jobs().queue().execute.side_effect = side_effect
-
-    @mock.patch("subprocess32.check_call")
-    @mock.patch("subprocess32.check_output")
-    def test_unsatisfiable_jobs(self, mock_squeue, mock_scancel):
-        job_uuid = 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'
-        container_uuid = 'yyyyy-dz642-yyyyyyyyyyyyyyy'
-        mock_squeue.return_value = "1|1024|0|(Resources)|" + container_uuid + "||1234567890\n"
-
-        self.build_monitor([{'items': [{'uuid': job_uuid}]}],
-                           self.MockCalculatorUnsatisfiableJobs(), True, True)
-        self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
-        self.monitor.ping().get(self.TIMEOUT)
-        self.stop_proxy(self.monitor)
-        self.client.jobs().cancel.assert_called_with(uuid=job_uuid)
-        mock_scancel.assert_called_with(['scancel', '--name='+container_uuid])
-
-    @mock.patch("subprocess32.check_output")
-    def test_subscribers_get_server_lists(self, mock_squeue):
-        mock_squeue.return_value = ""
-
-        self.build_monitor([{'items': [1, 2]}], self.MockCalculator(), True, True)
-        self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
-        self.stop_proxy(self.monitor)
-        self.subscriber.assert_called_with([testutil.MockSize(1),
-                                            testutil.MockSize(2)])
-
-    @mock.patch("subprocess32.check_output")
-    def test_squeue_server_list(self, mock_squeue):
-        mock_squeue.return_value = """1|1024|0|(Resources)|zzzzz-dz642-zzzzzzzzzzzzzzy|(null)|1234567890
-2|1024|0|(Resources)|zzzzz-dz642-zzzzzzzzzzzzzzz|(null)|1234567890
-"""
-
-        super(JobQueueMonitorActorTestCase, self).build_monitor(jobqueue.ServerCalculator(
-            [(testutil.MockSize(n), {'cores': n, 'ram': n*1024, 'scratch': n}) for n in range(1, 3)]),
-                                                                True, True)
-        self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
-        self.stop_proxy(self.monitor)
-        self.subscriber.assert_called_with([testutil.MockSize(1),
-                                            testutil.MockSize(2)])
-
-    @mock.patch("subprocess32.check_output")
-    def test_squeue_server_list_suffix(self, mock_squeue):
-        mock_squeue.return_value = """1|1024M|0|(ReqNodeNotAvail, UnavailableNodes:compute123)|zzzzz-dz642-zzzzzzzzzzzzzzy|(null)|1234567890
-1|2G|0|(ReqNodeNotAvail)|zzzzz-dz642-zzzzzzzzzzzzzzz|(null)|1234567890
-"""
-
-        super(JobQueueMonitorActorTestCase, self).build_monitor(jobqueue.ServerCalculator(
-            [(testutil.MockSize(n), {'cores': n, 'ram': n*1024, 'scratch': n}) for n in range(1, 3)]),
-                                                                True, True)
-        self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
-        self.stop_proxy(self.monitor)
-        self.subscriber.assert_called_with([testutil.MockSize(1),
-                                            testutil.MockSize(2)])
-
-    @mock.patch("subprocess32.check_output")
-    def test_squeue_server_list_instancetype_constraint(self, mock_squeue):
-        mock_squeue.return_value = """1|1024|0|(Resources)|zzzzz-dz642-zzzzzzzzzzzzzzy|instancetype=z2.test|1234567890\n"""
-        super(JobQueueMonitorActorTestCase, self).build_monitor(jobqueue.ServerCalculator(
-            [(testutil.MockSize(n), {'cores': n, 'ram': n*1024, 'scratch': n}) for n in range(1, 3)]),
-                                                                True, True)
-        self.monitor.subscribe(self.subscriber).get(self.TIMEOUT)
-        self.stop_proxy(self.monitor)
-        self.subscriber.assert_called_with([testutil.MockSize(2)])
-
-    def test_coerce_to_mb(self):
-        self.assertEqual(1, jobqueue.JobQueueMonitorActor.coerce_to_mb("1"))
-        self.assertEqual(512, jobqueue.JobQueueMonitorActor.coerce_to_mb("512"))
-        self.assertEqual(512, jobqueue.JobQueueMonitorActor.coerce_to_mb("512M"))
-        self.assertEqual(1024, jobqueue.JobQueueMonitorActor.coerce_to_mb("1024M"))
-        self.assertEqual(1024, jobqueue.JobQueueMonitorActor.coerce_to_mb("1G"))
-        self.assertEqual(1536, jobqueue.JobQueueMonitorActor.coerce_to_mb("1.5G"))
-        self.assertEqual(2048, jobqueue.JobQueueMonitorActor.coerce_to_mb("2G"))
-        self.assertEqual(1025, jobqueue.JobQueueMonitorActor.coerce_to_mb("1025M"))
-        self.assertEqual(1048576, jobqueue.JobQueueMonitorActor.coerce_to_mb("1T"))
-        self.assertEqual(1572864, jobqueue.JobQueueMonitorActor.coerce_to_mb("1.5T"))
-        self.assertEqual(1073741824, jobqueue.JobQueueMonitorActor.coerce_to_mb("1P"))
-        self.assertEqual(1610612736, jobqueue.JobQueueMonitorActor.coerce_to_mb("1.5P"))
-        self.assertEqual(0, jobqueue.JobQueueMonitorActor.coerce_to_mb("0"))
-        self.assertEqual(0, jobqueue.JobQueueMonitorActor.coerce_to_mb("0M"))
-        self.assertEqual(0, jobqueue.JobQueueMonitorActor.coerce_to_mb("0G"))
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/services/nodemanager/tests/test_nodelist.py b/services/nodemanager/tests/test_nodelist.py
deleted file mode 100644 (file)
index df31a12..0000000
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import unittest
-import mock
-
-import arvnodeman.nodelist as nodelist
-from libcloud.compute.base import NodeSize
-from . import testutil
-
-class ArvadosNodeListMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
-                                          unittest.TestCase):
-    TEST_CLASS = nodelist.ArvadosNodeListMonitorActor
-
-    def build_monitor(self, side_effect, *args, **kwargs):
-        super(ArvadosNodeListMonitorActorTestCase, self).build_monitor(
-            *args, **kwargs)
-        self.client.nodes().list().execute.side_effect = side_effect
-
-    @mock.patch("subprocess32.check_output")
-    def test_uuid_is_subscription_key(self, sinfo_mock):
-        sinfo_mock.return_value = ""
-        node = testutil.arvados_node_mock()
-        self.build_monitor([{
-            'items': [node],
-            'items_available': 1,
-            'offset': 0
-        }, {
-            'items': [],
-            'items_available': 1,
-            'offset': 1
-        }])
-        self.monitor.subscribe_to(node['uuid'],
-                                  self.subscriber).get(self.TIMEOUT)
-        self.stop_proxy(self.monitor)
-        self.subscriber.assert_called_with(node)
-        self.assertEqual("down", node["crunch_worker_state"])
-
-    @mock.patch("subprocess32.check_output")
-    def test_update_from_sinfo(self, sinfo_mock):
-        sinfo_mock.return_value = """compute1|idle|instancetype=a1.test
-compute2|alloc|(null)
-notarvados12345|idle|(null)
-"""
-        nodeIdle = testutil.arvados_node_mock(node_num=1)
-        nodeBusy = testutil.arvados_node_mock(node_num=2)
-        nodeMissing = testutil.arvados_node_mock(node_num=99)
-        self.build_monitor([{
-            'items': [nodeIdle, nodeBusy, nodeMissing],
-            'items_available': 1,
-            'offset': 0
-        }, {
-            'items': [],
-            'items_available': 1,
-            'offset': 1
-        }])
-        self.monitor.subscribe_to(nodeMissing['uuid'],
-                                  self.subscriber).get(self.TIMEOUT)
-        self.stop_proxy(self.monitor)
-        self.subscriber.assert_called_with(nodeMissing)
-
-        self.assertEqual("idle", nodeIdle["crunch_worker_state"])
-        self.assertEqual("busy", nodeBusy["crunch_worker_state"])
-        self.assertEqual("down", nodeMissing["crunch_worker_state"])
-
-        self.assertEqual("instancetype=a1.test", nodeIdle["slurm_node_features"])
-        self.assertEqual("", nodeBusy["slurm_node_features"])
-        self.assertEqual("", nodeMissing["slurm_node_features"])
-
-
-class CloudNodeListMonitorActorTestCase(testutil.RemotePollLoopActorTestMixin,
-                                        unittest.TestCase):
-    TEST_CLASS = nodelist.CloudNodeListMonitorActor
-
-    class MockNode(object):
-        def __init__(self, count):
-            self.id = str(count)
-            self.name = 'test{}.example.com'.format(count)
-            self.private_ips = ['10.0.0.{}'.format(count)]
-            self.public_ips = []
-            self.size = testutil.MockSize(1)
-            self.state = 0
-            self.extra = {'arvados_node_size': self.size.id}
-
-
-    def build_monitor(self, side_effect, *args, **kwargs):
-        super(CloudNodeListMonitorActorTestCase, self).build_monitor(
-            *args, **kwargs)
-        self.client.list_nodes.side_effect = side_effect
-
-    def test_id_is_subscription_key(self):
-        node = self.MockNode(1)
-        mock_calc = mock.MagicMock()
-        mock_calc.find_size.return_value = testutil.MockSize(2)
-        self.build_monitor([[node]], mock_calc)
-        self.monitor.subscribe_to('1', self.subscriber).get(self.TIMEOUT)
-        self.stop_proxy(self.monitor)
-        self.subscriber.assert_called_with(node)
-        self.assertEqual(testutil.MockSize(2), node.size)
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/services/nodemanager/tests/test_status.py b/services/nodemanager/tests/test_status.py
deleted file mode 100644 (file)
index 2a1c0fc..0000000
+++ /dev/null
@@ -1,139 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-from future import standard_library
-
-import json
-import mock
-import random
-import requests
-import unittest
-
-import arvnodeman.status as status
-import arvnodeman.config as config
-
-
-class TestServer(object):
-    def __init__(self, management_token=None):
-        self.mgmt_token = management_token
-
-    def __enter__(self):
-        cfg = config.NodeManagerConfig()
-        cfg.set('Manage', 'port', '0')
-        cfg.set('Manage', 'address', '127.0.0.1')
-        if self.mgmt_token != None:
-            cfg.set('Manage', 'ManagementToken', self.mgmt_token)
-        self.srv = status.Server(cfg)
-        self.srv.start()
-        addr, port = self.srv.server_address
-        self.srv_base = 'http://127.0.0.1:'+str(port)
-        return self
-
-    def __exit__(self, exc_type, exc_value, traceback):
-        self.srv.shutdown()
-
-    def get_status_response(self):
-        return requests.get(self.srv_base+'/status.json')
-
-    def get_status(self):
-        return self.get_status_response().json()
-
-    def get_healthcheck_ping(self, auth_header=None):
-        headers = {}
-        if auth_header != None:
-            headers['Authorization'] = auth_header
-        return requests.get(self.srv_base+'/_health/ping', headers=headers)
-
-class StatusServerUpdates(unittest.TestCase):
-    def test_updates(self):
-        with TestServer() as srv:
-            for n in [1, 2, 3]:
-                status.tracker.update({'nodes_'+str(n): n})
-                r = srv.get_status_response()
-                self.assertEqual(200, r.status_code)
-                self.assertEqual('application/json', r.headers['content-type'])
-                resp = r.json()
-                self.assertEqual(n, resp['nodes_'+str(n)])
-            self.assertEqual(1, resp['nodes_1'])
-            self.assertIn('Version', resp)
-            self.assertIn('config_max_nodes', resp)
-
-    def test_counters(self):
-        with TestServer() as srv:
-            resp = srv.get_status()
-            # Test counters existance
-            for counter in ['list_nodes_errors', 'create_node_errors',
-                'destroy_node_errors', 'boot_failures', 'actor_exceptions']:
-                self.assertIn(counter, resp)
-            # Test counter increment
-            for count in range(1, 3):
-                status.tracker.counter_add('a_counter')
-                resp = srv.get_status()
-                self.assertEqual(count, resp['a_counter'])
-
-    @mock.patch('time.time')
-    def test_idle_times(self, time_mock):
-        with TestServer() as srv:
-            resp = srv.get_status()
-            node_name = 'idle_compute{}'.format(random.randint(1, 1024))
-            self.assertIn('idle_times', resp)
-            # Test add an idle node
-            time_mock.return_value = 10
-            status.tracker.idle_in(node_name)
-            time_mock.return_value += 10
-            resp = srv.get_status()
-            self.assertEqual(10, resp['idle_times'][node_name])
-            # Test adding the same idle node a 2nd time
-            time_mock.return_value += 10
-            status.tracker.idle_in(node_name)
-            time_mock.return_value += 10
-            resp = srv.get_status()
-            # Idle timestamp doesn't get reset if already exists
-            self.assertEqual(30, resp['idle_times'][node_name])
-            # Test remove idle node
-            status.tracker.idle_out(node_name)
-            resp = srv.get_status()
-            self.assertNotIn(node_name, resp['idle_times'])
-
-
-class StatusServerDisabled(unittest.TestCase):
-    def test_config_disabled(self):
-        cfg = config.NodeManagerConfig()
-        cfg.set('Manage', 'port', '-1')
-        cfg.set('Manage', 'address', '127.0.0.1')
-        self.srv = status.Server(cfg)
-        self.srv.start()
-        self.assertFalse(self.srv.enabled)
-        self.assertFalse(getattr(self.srv, '_thread', False))
-
-class HealthcheckPing(unittest.TestCase):
-    def test_ping_disabled(self):
-        with TestServer() as srv:
-            r = srv.get_healthcheck_ping()
-            self.assertEqual(404, r.status_code)
-
-    def test_ping_no_auth(self):
-        with TestServer('configuredmanagementtoken') as srv:
-            r = srv.get_healthcheck_ping()
-            self.assertEqual(401, r.status_code)
-
-    def test_ping_bad_auth_format(self):
-        with TestServer('configuredmanagementtoken') as srv:
-            r = srv.get_healthcheck_ping('noBearer')
-            self.assertEqual(403, r.status_code)
-
-    def test_ping_bad_auth_token(self):
-        with TestServer('configuredmanagementtoken') as srv:
-            r = srv.get_healthcheck_ping('Bearer badtoken')
-            self.assertEqual(403, r.status_code)
-
-    def test_ping_success(self):
-        with TestServer('configuredmanagementtoken') as srv:
-            r = srv.get_healthcheck_ping('Bearer configuredmanagementtoken')
-            self.assertEqual(200, r.status_code)
-            self.assertEqual('application/json', r.headers['content-type'])
-            resp = r.json()
-            self.assertEqual('{"health": "OK"}', json.dumps(resp))
diff --git a/services/nodemanager/tests/test_timedcallback.py b/services/nodemanager/tests/test_timedcallback.py
deleted file mode 100644 (file)
index 21a9b5a..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import time
-import unittest
-
-import mock
-import pykka
-
-import arvnodeman.timedcallback as timedcallback
-from . import testutil
-
-@testutil.no_sleep
-class TimedCallBackActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
-    def test_immediate_turnaround(self):
-        receiver = mock.Mock()
-        deliverer = timedcallback.TimedCallBackActor.start().proxy()
-        deliverer.schedule(time.time() - 1, receiver,
-                           'immediate').get(self.TIMEOUT)
-        self.stop_proxy(deliverer)
-        receiver.assert_called_with('immediate')
-
-    def test_delayed_turnaround(self):
-        receiver = mock.Mock()
-        mock_now = mock.Mock()
-        mock_now.return_value = 0
-        deliverer = timedcallback.TimedCallBackActor.start(timefunc=mock_now).proxy()
-        deliverer.schedule(1, receiver, 'delayed')
-        deliverer.schedule(3, receiver, 'failure').get(self.TIMEOUT)
-        self.assertFalse(receiver.called)
-        mock_now.return_value = 2
-        deliverer.schedule(3, receiver, 'failure').get(self.TIMEOUT)
-        self.stop_proxy(deliverer)
-        receiver.assert_called_with('delayed')
-
-    def test_out_of_order_scheduling(self):
-        receiver = mock.Mock()
-        mock_now = mock.Mock()
-        mock_now.return_value = 1.5
-        deliverer = timedcallback.TimedCallBackActor.start(timefunc=mock_now).proxy()
-        deliverer.schedule(2, receiver, 'second')
-        deliverer.schedule(1, receiver, 'first')
-        deliverer.schedule(3, receiver, 'failure').get(self.TIMEOUT)
-        receiver.assert_called_with('first')
-        mock_now.return_value = 2.5
-        deliverer.schedule(3, receiver, 'failure').get(self.TIMEOUT)
-        self.stop_proxy(deliverer)
-        receiver.assert_called_with('second')
-
-    def test_dead_actors_ignored(self):
-        receiver = mock.Mock(name='dead_actor', spec=pykka.ActorRef)
-        receiver.tell.side_effect = pykka.ActorDeadError
-        deliverer = timedcallback.TimedCallBackActor.start().proxy()
-        deliverer.schedule(time.time() - 1, receiver.tell,
-                           'error').get(self.TIMEOUT)
-        self.assertTrue(self.stop_proxy(deliverer), "deliverer died")
-        receiver.tell.assert_called_with('error')
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py
deleted file mode 100644 (file)
index ee475ef..0000000
+++ /dev/null
@@ -1,236 +0,0 @@
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-from __future__ import absolute_import, print_function
-
-import contextlib
-import datetime
-import mock
-import pykka
-import sys
-import threading
-import time
-
-import libcloud.common.types as cloud_types
-
-from . import pykka_timeout
-
-no_sleep = mock.patch('time.sleep', lambda n: None)
-
-def arvados_node_mock(node_num=99, job_uuid=None, age=-1, **kwargs):
-    mod_time = datetime.datetime.utcnow() - datetime.timedelta(seconds=age)
-    mod_time_s = mod_time.strftime('%Y-%m-%dT%H:%M:%S.%fZ')
-    if job_uuid is True:
-        job_uuid = 'zzzzz-jjjjj-jobjobjobjobjob'
-    crunch_worker_state = 'idle' if (job_uuid is None) else 'busy'
-    node = {'uuid': 'zzzzz-yyyyy-{:015x}'.format(node_num),
-            'created_at': '2014-01-01T01:02:03.04050607Z',
-            'modified_at': mod_time_s,
-            'first_ping_at': kwargs.pop('first_ping_at', mod_time_s),
-            'last_ping_at': mod_time_s,
-            'slot_number': node_num,
-            'hostname': 'compute{}'.format(node_num),
-            'domain': 'zzzzz.arvadosapi.com',
-            'ip_address': ip_address_mock(node_num),
-            'job_uuid': job_uuid,
-            'crunch_worker_state': crunch_worker_state,
-            'properties': {},
-            'info': {'ping_secret': 'defaulttestsecret', 'ec2_instance_id': str(node_num)}}
-    node.update(kwargs)
-    return node
-
-def cloud_object_mock(name_id, **extra):
-    # A very generic mock, useful for stubbing libcloud objects we
-    # only search for and pass around, like locations, subnets, etc.
-    cloud_object = mock.NonCallableMagicMock(['id', 'name'],
-                                             name='cloud_object')
-    cloud_object.name = str(name_id)
-    cloud_object.id = 'id_' + cloud_object.name
-    cloud_object.extra = extra
-    return cloud_object
-
-
-def cloud_node_fqdn(node):
-    # We intentionally put the FQDN somewhere goofy to make sure tested code is
-    # using this function for lookups.
-    return node.extra.get('testname', node.name+'.NoTestName.invalid')
-
-def ip_address_mock(last_octet):
-    return '10.20.30.{}'.format(last_octet)
-
-@contextlib.contextmanager
-def redirected_streams(stdout=None, stderr=None):
-    orig_stdout, sys.stdout = sys.stdout, stdout or sys.stdout
-    orig_stderr, sys.stderr = sys.stderr, stderr or sys.stderr
-    try:
-        yield
-    finally:
-        sys.stdout = orig_stdout
-        sys.stderr = orig_stderr
-
-
-class MockShutdownTimer(object):
-    def _set_state(self, is_open, next_opening):
-        self.window_open = lambda: is_open
-        self.next_opening = lambda: next_opening
-
-
-class MockSize(object):
-    def __init__(self, factor, preemptible=False):
-        self.id = 'z{}.test'.format(factor)
-        self.name = 'test size '+self.id
-        self.ram = 128 * factor
-        self.disk = factor   # GB
-        self.scratch = 1000 * factor # MB
-        self.bandwidth = 16 * factor
-        self.price = float(factor)
-        self.extra = {}
-        self.real = self
-        self.preemptible = preemptible
-
-    def __eq__(self, other):
-        return self.id == other.id
-
-
-class MockTimer(object):
-    def __init__(self, deliver_immediately=True):
-        self.deliver_immediately = deliver_immediately
-        self.messages = []
-        self.lock = threading.Lock()
-
-    def deliver(self):
-        with self.lock:
-            to_deliver = self.messages
-            self.messages = []
-        for callback, args, kwargs in to_deliver:
-            try:
-                callback(*args, **kwargs)
-            except pykka.ActorDeadError:
-                pass
-
-    def schedule(self, want_time, callback, *args, **kwargs):
-        with self.lock:
-            self.messages.append((callback, args, kwargs))
-        if self.deliver_immediately:
-            self.deliver()
-
-
-class ActorTestMixin(object):
-    FUTURE_CLASS = pykka.ThreadingFuture
-    TIMEOUT = pykka_timeout
-
-    def tearDown(self):
-        pykka.ActorRegistry.stop_all()
-
-    def stop_proxy(self, proxy):
-        th = proxy.get_thread().get()
-        t = proxy.actor_ref.stop(timeout=self.TIMEOUT)
-        th.join()
-        return t
-
-    def wait_for_assignment(self, proxy, attr_name, unassigned=None,
-                            timeout=TIMEOUT):
-        deadline = time.time() + timeout
-        while True:
-            loop_timeout = deadline - time.time()
-            if loop_timeout <= 0:
-                self.fail("actor did not assign {} in time".format(attr_name))
-            result = getattr(proxy, attr_name).get(loop_timeout)
-            if result is not unassigned:
-                return result
-
-    def busywait(self, f, finalize=None):
-        n = 0
-        while not f() and n < 20:
-            time.sleep(.1)
-            n += 1
-        if finalize is not None:
-            finalize()
-        self.assertTrue(f())
-
-
-class DriverTestMixin(object):
-    def setUp(self):
-        self.driver_mock = mock.MagicMock(name='driver_mock')
-        super(DriverTestMixin, self).setUp()
-
-    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
-        create_kwargs.setdefault('ping_host', '100::')
-        return self.TEST_CLASS(
-            auth_kwargs, list_kwargs, create_kwargs,
-            driver_class=self.driver_mock)
-
-    def driver_method_args(self, method_name):
-        return getattr(self.driver_mock(), method_name).call_args
-
-    def test_driver_create_retry(self):
-        with mock.patch('time.sleep'):
-            driver_mock2 = mock.MagicMock(name='driver_mock2')
-            self.driver_mock.side_effect = (Exception("oops"), driver_mock2)
-            kwargs = {'user_id': 'foo'}
-            driver = self.new_driver(auth_kwargs=kwargs)
-            self.assertTrue(self.driver_mock.called)
-            self.assertIs(driver.real, driver_mock2)
-
-    def test_create_can_find_node_after_timeout(self, create_kwargs={}, node_extra={}):
-        driver = self.new_driver(create_kwargs=create_kwargs)
-        arv_node = arvados_node_mock()
-        cloud_node = cloud_node_mock(**node_extra)
-        cloud_node.name = driver.create_cloud_name(arv_node)
-        create_method = self.driver_mock().create_node
-        create_method.side_effect = cloud_types.LibcloudError("fake timeout")
-        list_method = self.driver_mock().list_nodes
-        list_method.return_value = [cloud_node]
-        actual = driver.create_node(MockSize(1), arv_node)
-        self.assertIs(cloud_node, actual)
-
-    def test_create_can_raise_exception_after_timeout(self):
-        driver = self.new_driver()
-        arv_node = arvados_node_mock()
-        create_method = self.driver_mock().create_node
-        create_method.side_effect = cloud_types.LibcloudError("fake timeout")
-        list_method = self.driver_mock().list_nodes
-        list_method.return_value = []
-        with self.assertRaises(cloud_types.LibcloudError) as exc_test:
-            driver.create_node(MockSize(1), arv_node)
-        self.assertIs(create_method.side_effect, exc_test.exception)
-
-    def check_node_found_after_timeout_has_fixed_size(self, size, cloud_node,
-                                                      create_kwargs={}):
-        # This method needs to be called explicitly by driver test suites
-        # that need it.
-        self.driver_mock().list_sizes.return_value = [size]
-        driver = self.new_driver(create_kwargs=create_kwargs)
-        arv_node = arvados_node_mock()
-        cloud_node.name = driver.create_cloud_name(arv_node)
-        create_method = self.driver_mock().create_node
-        create_method.side_effect = cloud_types.LibcloudError("fake timeout")
-        self.driver_mock().list_nodes.return_value = [cloud_node]
-        actual = driver.create_node(size, arv_node)
-        self.assertIs(size, actual.size)
-
-
-class RemotePollLoopActorTestMixin(ActorTestMixin):
-    def build_monitor(self, *args, **kwargs):
-        self.timer = mock.MagicMock(name='timer_mock')
-        self.client = mock.MagicMock(name='client_mock')
-        self.subscriber = mock.Mock(name='subscriber_mock')
-        self.monitor = self.TEST_CLASS.start(
-            self.client, self.timer, *args, **kwargs).proxy()
-
-def cloud_node_mock(node_num=99, size=None, **extra):
-    if size is None:
-        size = MockSize(node_num)
-    node = mock.NonCallableMagicMock(
-        ['id', 'name', 'state', 'public_ips', 'private_ips', 'driver', 'size',
-         'image', 'extra'],
-        name='cloud_node')
-    node.id = str(node_num)
-    node.name = node.id
-    node.size = size
-    node.public_ips = []
-    node.private_ips = [ip_address_mock(node_num)]
-    node.extra = extra
-    return node
index 292a4fd746a6697fafa0bda5155e766bab79618d..5abaa90e36d1cfc60d286a336fb4551b6e1f5ee6 100755 (executable)
@@ -206,7 +206,8 @@ run() {
               --publish=9002:9002
               --publish=25101:25101
               --publish=8001:8001
-              --publish=8002:8002"
+              --publish=8002:8002
+             --publish=45000-45020:45000-45020"
     else
         PUBLIC=""
     fi
index f7fc1a07b401aba074f954044e2bde6ae88ef258..c8db9499cda716240ed8ba421d9f72d496a0bf27 100644 (file)
@@ -1,6 +1,5 @@
 {
   "variables": {
-    "storage_account": null,
     "resource_group": null,
     "client_id": "{{env `ARM_CLIENT_ID`}}",
     "client_secret": "{{env `ARM_CLIENT_SECRET`}}",
       "subscription_id": "{{user `subscription_id`}}",
       "tenant_id": "{{user `tenant_id`}}",
 
-      "resource_group_name": "{{user `resource_group`}}",
-      "storage_account": "{{user `storage_account`}}",
-
-      "capture_container_name": "images",
-      "capture_name_prefix": "{{user `arvados_cluster`}}-compute",
+      "managed_image_resource_group_name": "{{user `resource_group`}}",
+      "managed_image_name": "{{user `arvados_cluster`}}-compute-v{{ timestamp }}",
 
       "ssh_username": "{{user `ssh_user`}}",
       "ssh_private_key_file": "{{user `ssh_private_key_file`}}",
index e8265ae198316659aa56996de20cfa6f6f4612ed..030eb410b8d52fcf7c1e72e2a8be79c0af90bf7d 100755 (executable)
@@ -43,8 +43,6 @@ Options:
       Azure secrets file which will be sourced from this script
   --azure-resource-group (default: false, required if building for Azure)
       Azure resource group
-  --azure-storage-account (default: false, required if building for Azure)
-      Azure storage account
   --azure-location (default: false, required if building for Azure)
       Azure location, e.g. centralus, eastus, westeurope
   --azure-sku (default: unset, required if building for Azure, e.g. 16.04-LTS)
@@ -76,7 +74,6 @@ GCP_ACCOUNT_FILE=
 GCP_ZONE=
 AZURE_SECRETS_FILE=
 AZURE_RESOURCE_GROUP=
-AZURE_STORAGE_ACCOUNT=
 AZURE_LOCATION=
 AZURE_CLOUD_ENVIRONMENT=
 DEBUG=
@@ -86,7 +83,7 @@ AWS_DEFAULT_REGION=us-east-1
 PUBLIC_KEY_FILE=
 
 PARSEDOPTS=$(getopt --name "$0" --longoptions \
-    help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-storage-account:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,domain:,resolver:,reposuffix:,public-key-file:,debug \
+    help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,domain:,resolver:,reposuffix:,public-key-file:,debug \
     -- "" "$@")
 if [ $? -ne 0 ]; then
     exit 1
@@ -139,9 +136,6 @@ while [ $# -gt 0 ]; do
         --azure-resource-group)
             AZURE_RESOURCE_GROUP="$2"; shift
             ;;
-        --azure-storage-account)
-            AZURE_STORAGE_ACCOUNT="$2"; shift
-            ;;
         --azure-location)
             AZURE_LOCATION="$2"; shift
             ;;
@@ -248,9 +242,6 @@ fi
 if [[ "$AZURE_RESOURCE_GROUP" != "" ]]; then
   EXTRA2+=" -var resource_group=$AZURE_RESOURCE_GROUP"
 fi
-if [[ "$AZURE_STORAGE_ACCOUNT" != "" ]]; then
-  EXTRA2+=" -var storage_account=$AZURE_STORAGE_ACCOUNT"
-fi
 if [[ "$AZURE_LOCATION" != "" ]]; then
   EXTRA2+=" -var location=$AZURE_LOCATION"
 fi
index 2c49dcae62f1a30be179735e15d42a6b1e148934..463c552c4f1eb5caf0868337858197a747bc8fa8 100644 (file)
@@ -364,7 +364,7 @@ class Summarizer(object):
                 constraint_key,
                 int(used_cores))
 
-    # FIXME: This needs to be updated to account for current nodemanager algorithms
+    # FIXME: This needs to be updated to account for current a-d-c algorithms
     def _recommend_ram(self):
         """Recommend an economical RAM constraint for this job.