Merge branch '15652-crunch-run-memory-use'
authorTom Clegg <tclegg@veritasgenetics.com>
Tue, 22 Oct 2019 20:09:02 +0000 (16:09 -0400)
committerTom Clegg <tclegg@veritasgenetics.com>
Tue, 22 Oct 2019 20:09:02 +0000 (16:09 -0400)
refs #15652

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

64 files changed:
apps/workbench/Gemfile
apps/workbench/Gemfile.lock
apps/workbench/app/assets/javascripts/components/search.js
apps/workbench/app/assets/javascripts/filterable.js
apps/workbench/app/assets/javascripts/ilike_filters.js [new file with mode: 0644]
apps/workbench/app/assets/javascripts/to_tsquery.js [deleted file]
doc/_config.yml
doc/admin/config-migration.html.textile.liquid
doc/admin/federation.html.textile.liquid
doc/admin/merge-remote-account.html.textile.liquid
doc/admin/upgrade-crunch2.html.textile.liquid
doc/architecture/federation.html.textile.liquid
lib/config/config.default.yml
lib/config/deprecated.go
lib/config/export.go
lib/config/generated_config.go
lib/controller/handler_test.go
lib/dispatchcloud/scheduler/sync.go
lib/dispatchcloud/scheduler/sync_test.go [new file with mode: 0644]
lib/service/cmd.go
sdk/cli/arvados-cli.gemspec
sdk/cwl/arvados_cwl/done.py
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/tests/federation/arvbox-make-federation.cwl
sdk/cwl/tests/federation/arvbox/fed-config.cwl
sdk/cwl/tests/federation/arvbox/setup-user.cwl
sdk/cwl/tests/federation/arvbox/start.cwl
sdk/cwl/tests/test_container.py
sdk/go/arvados/config.go
sdk/go/httpserver/request_limiter.go
sdk/go/keepclient/discover.go
sdk/python/arvados/arvfile.py
sdk/python/arvados/collection.py
sdk/python/arvados/commands/federation_migrate.py
sdk/python/arvados/commands/put.py
sdk/python/arvados/keep.py
sdk/python/arvados/retry.py
sdk/python/tests/fed-migrate/README [new file with mode: 0644]
sdk/python/tests/fed-migrate/arvbox-make-federation.cwl [new file with mode: 0644]
sdk/python/tests/fed-migrate/check.py [new file with mode: 0644]
sdk/python/tests/fed-migrate/create_users.py [new file with mode: 0644]
sdk/python/tests/fed-migrate/fed-migrate.cwl [new file with mode: 0644]
sdk/python/tests/fed-migrate/fed-migrate.cwlex [new file with mode: 0644]
sdk/python/tests/fed-migrate/run-test.cwlex [new file with mode: 0644]
sdk/python/tests/fed-migrate/set_login.py [new file with mode: 0644]
sdk/python/tests/fed-migrate/superuser-tok.cwl [new file with mode: 0755]
sdk/python/tests/test_arvfile.py
sdk/python/tests/test_keep_client.py
sdk/ruby/lib/arvados/collection.rb
sdk/ruby/test/test_collection.rb
services/api/Gemfile
services/api/Gemfile.lock
services/api/app/controllers/arvados/v1/schema_controller.rb
services/api/app/controllers/arvados/v1/users_controller.rb
services/api/app/helpers/commits_helper.rb
services/api/app/models/api_client_authorization.rb
services/api/app/models/keep_service.rb
services/api/app/models/repository.rb
services/api/app/models/user.rb
services/api/lib/tasks/symbols.rake
services/api/test/functional/arvados/v1/users_controller_test.rb
services/api/test/integration/users_test.rb
services/keepstore/keepstore.service
tools/arvbox/bin/arvbox

index bc62407bc5173fa77b63daaa7f6f58882c29be33..40cf4a86c0befdb7a5bd892f80d201d456a69430 100644 (file)
@@ -5,7 +5,7 @@
 source 'https://rubygems.org'
 
 gem 'rails', '~> 5.0.0'
-gem 'arvados', '>= 0.1.20150511150219'
+gem 'arvados', git: 'https://github.com/curoverse/arvados.git', glob: 'sdk/ruby/arvados.gemspec'
 
 gem 'activerecord-nulldb-adapter', git: 'https://github.com/curoverse/nulldb'
 gem 'multi_json'
@@ -111,3 +111,7 @@ gem 'logstash-event'
 gem 'safe_yaml'
 
 gem 'npm-rails'
+
+# arvados-google-api-client and googleauth (and thus arvados) gems
+# depend on signet, but signet 0.12 is incompatible with ruby 2.3.
+gem 'signet', '< 0.12'
index ce328dc8954393d24b295fddaccf1b1487715a5f..b4b6100f4a533a2e5e71bddbeafc64f9b65392d7 100644 (file)
@@ -1,3 +1,17 @@
+GIT
+  remote: https://github.com/curoverse/arvados.git
+  revision: dd9f2403f43bcb93da5908ddde57d8c0491bb4c2
+  glob: sdk/ruby/arvados.gemspec
+  specs:
+    arvados (1.4.1.20191019025325)
+      activesupport (>= 3)
+      andand (~> 1.3, >= 1.3.3)
+      arvados-google-api-client (>= 0.7, < 0.8.9)
+      faraday (< 0.16)
+      i18n (~> 0)
+      json (>= 1.7.7, < 3)
+      jwt (>= 0.1.5, < 2)
+
 GIT
   remote: https://github.com/curoverse/nulldb
   revision: d8e0073b665acdd2537c5eb15178a60f02f4b413
@@ -58,14 +72,7 @@ GEM
     andand (1.3.3)
     angularjs-rails (1.3.15)
     arel (7.1.4)
-    arvados (1.3.3.20190320201707)
-      activesupport (>= 3)
-      andand (~> 1.3, >= 1.3.3)
-      arvados-google-api-client (>= 0.7, < 0.8.9)
-      i18n (~> 0)
-      json (>= 1.7.7, < 3)
-      jwt (>= 0.1.5, < 2)
-    arvados-google-api-client (0.8.7.2)
+    arvados-google-api-client (0.8.7.3)
       activesupport (>= 3.2, < 5.1)
       addressable (~> 2.3)
       autoparse (~> 0.3)
@@ -153,7 +160,7 @@ GEM
       actionpack (>= 4)
       less (~> 2.6.0)
       sprockets (>= 2)
-    libv8 (3.16.14.19-x86_64-linux)
+    libv8 (3.16.14.19)
     lograge (0.10.0)
       actionpack (>= 4)
       activesupport (>= 4)
@@ -178,7 +185,7 @@ GEM
       metaclass (~> 0.0.1)
     morrisjs-rails (0.5.1.2)
       railties (> 3.1, < 6)
-    multi_json (1.13.1)
+    multi_json (1.14.1)
     multipart-post (2.1.1)
     net-scp (2.0.0)
       net-ssh (>= 2.6.5, < 6.0.0)
@@ -320,7 +327,7 @@ DEPENDENCIES
   activerecord-nulldb-adapter!
   andand
   angularjs-rails (~> 1.3.8)
-  arvados (>= 0.1.20150511150219)
+  arvados!
   bootstrap-sass (~> 3.4.1)
   bootstrap-tab-history-rails
   bootstrap-x-editable-rails
@@ -359,6 +366,7 @@ DEPENDENCIES
   sass
   sassc-rails
   selenium-webdriver (~> 3)
+  signet (< 0.12)
   simplecov (~> 0.7)
   simplecov-rcov
   sshkey
index 04572ec3cc9ebd82e5ef896d086339337771a431..fc6308678002a20969d4a44cbadf834731e067dd 100644 (file)
@@ -142,11 +142,7 @@ window.Search = {
                                 sessionKey: key,
                                 loadFunc: function(filters) {
                                     // Apply additional type dependant filters
-                                    filters = filters.concat(obj_type.filters)
-                                    var tsquery = to_tsquery(q)
-                                    if (tsquery) {
-                                        filters.push(['any', '@@', tsquery])
-                                    }
+                                    filters = filters.concat(obj_type.filters).concat(ilike_filters(q))
                                     return vnode.state.sessionDB.request(session, obj_type.api_path, {
                                         data: {
                                             filters: JSON.stringify(filters),
index e571e32db91560388167fed7c0a2725d649f678d..bf859c350a08ca794a1e7a11575d8cc73b5624f1 100644 (file)
 function updateFilterableQueryNow($target) {
     var newquery = $target.data('filterable-query-new');
     var params = $target.data('infinite-content-params-filterable') || {};
-    var tsquery = to_tsquery(newquery);
-    if (tsquery == null) {
-        params.filters = [];
-    } else {
-        params.filters = [['any', '@@', tsquery]];
-    }
+    params.filters = ilike_filters(newquery);
     $(".modal-dialog-preview-pane").html("");
     $target.data('infinite-content-params-filterable', params);
     $target.data('filterable-query', newquery);
diff --git a/apps/workbench/app/assets/javascripts/ilike_filters.js b/apps/workbench/app/assets/javascripts/ilike_filters.js
new file mode 100644 (file)
index 0000000..4f5cd48
--- /dev/null
@@ -0,0 +1,29 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+// ilike_filters() converts a user-entered search query to a list of
+// filters using the newly added (as of Arvados 1.5) trigram indexes. It returns
+// [] (empty list) if it can't come up with anything valid (e.g., q consists
+// entirely of punctuation).
+//
+// Examples:
+//
+// "foo"     => [["any", "ilike", "%foo%"]]
+// "foo.bar" => [["any", "ilike", "%foo.bar%"]]                         // "." is a word char in ilike queries
+// "foo/b-r" => [["any", "ilike", "%foo/b-r%"]]                         // "/" and "-", too
+// "foo_bar" => [["any", "ilike", "%foo\\_bar%"]                        // "_" should be escaped so it can be used as a literal
+// "foo bar" => [["any", "ilike", "%foo%"], ["any", "ilike", "%bar%"]]
+// "foo|bar" => [["any", "ilike", "%foo%"], ["any", "ilike", "%bar%"]]
+// " oo|bar" => [["any", "ilike", "%oo%"], ["any", "ilike", "%bar%"]]
+// ""        => []
+// " "       => []
+// null      => []
+window.ilike_filters = function(q) {
+    q = (q || '').replace(/[^-\w\.\/]+/g, ' ').trim().replace(/_/g, '\\_')
+    if (q == '')
+        return []
+    return q.split(" ").map(function(term) {
+        return ["any", "ilike", "%"+term+"%"]
+    })
+}
\ No newline at end of file
diff --git a/apps/workbench/app/assets/javascripts/to_tsquery.js b/apps/workbench/app/assets/javascripts/to_tsquery.js
deleted file mode 100644 (file)
index f2e34d9..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-// to_tsquery() converts a user-entered search query to a useful
-// operand for the Arvados API "@@" filter. It returns null if it
-// can't come up with anything valid (e.g., q consists entirely of
-// punctuation).
-//
-// Examples:
-//
-// "foo"     => "foo:*"
-// "foo_bar" => "foo:*&bar:*"
-// "foo.bar" => "foo.bar:*"    // "." is a word char in FT queries
-// "foo/b-r" => "foo/b-r:*"    // "/" and "-", too
-// "foo|bar" => "foo:*&bar:*"
-// " oo|ba " => "oo:*&ba:*"
-// "__ "     => null
-// ""        => null
-// null      => null
-window.to_tsquery = function(q) {
-    q = (q || '').replace(/[^-\w\.\/]+/g, ' ').trim().replace(/ /g, ':*&')
-    if (q == '')
-        return null
-    return q + ':*'
-}
index c4fad997f17afb31327a17e8a38695f22a701934..344456d1f9b445e74d86c4bd9fbd2c624804a38b 100644 (file)
@@ -150,11 +150,10 @@ navbar:
       - admin/index.html.textile.liquid
     - Configuration:
       - admin/config.html.textile.liquid
+      - admin/federation.html.textile.liquid
     - Upgrading and migrations:
       - admin/upgrading.html.textile.liquid
       - admin/config-migration.html.textile.liquid
-      - install/migrate-docker19.html.textile.liquid
-      - admin/upgrade-crunch2.html.textile.liquid
     - Users and Groups:
       - install/cheat_sheet.html.textile.liquid
       - admin/activation.html.textile.liquid
@@ -173,11 +172,12 @@ navbar:
       - admin/collection-versioning.html.textile.liquid
       - admin/collection-managed-properties.html.textile.liquid
       - admin/keep-balance.html.textile.liquid
-    - Other:
-      - admin/federation.html.textile.liquid
       - admin/controlling-container-reuse.html.textile.liquid
       - admin/logs-table-management.html.textile.liquid
+    - Other:
       - admin/troubleshooting.html.textile.liquid
+      - install/migrate-docker19.html.textile.liquid
+      - admin/upgrade-crunch2.html.textile.liquid
   installguide:
     - Overview:
       - install/index.html.textile.liquid
index d40cd3bbdc5feeb548693d60d820aa75fd3fee7b..0de058b068ea9bf4f7793b231b04f2208a56e516 100644 (file)
@@ -62,7 +62,7 @@ h2(#keepstore). keepstore
 
 The legacy keepstore config (loaded from @/etc/arvados/keepstore/keepstore.yml@ or a different location specified via -legacy-keepstore-config command line argument) takes precedence over the centralized config. After you migrate everything from the legacy config to the centralized config, you should delete @/etc/arvados/keepstore/keepstore.yml@ and stop using the -legacy-keepstore-config argument.
 
-To migrate a keepstore node's configuration, first install @arvados-server@. Run @arvados-server config-diff@, review and apply the recommended changes to @/etc/arvados/config.yml@, and run @arvados-server config-diff@ again to check for additional warnings and recommendations. When you are satisfied, delete the legacy config file, restart keepstore, and check its startup logs. Copy the updated centralized config file to your next keepstore server, and repeat the process there.
+To migrate a keepstore node's configuration, first install @arvados-server@. Run @arvados-server config-check@, review and apply the recommended changes to @/etc/arvados/config.yml@, and run @arvados-server config-check@ again to check for additional warnings and recommendations. When you are satisfied, delete the legacy config file, restart keepstore, and check its startup logs. Copy the updated centralized config file to your next keepstore server, and repeat the process there.
 
 After migrating and removing all legacy keepstore config files, make sure the @/etc/arvados/config.yml@ file is identical across all system nodes -- API server, keepstore, etc. -- and restart all services to make sure they are using the latest configuration.
 
index 3728507f9eddf7b58c45d8925a8f49a317cbcda4..b1f1506e4c7cd93b4e2538876d0930ab5c2dbd48 100644 (file)
@@ -14,28 +14,11 @@ This page describes how to enable and configure federation capabilities between
 
 An overview on how this feature works is discussed in the "architecture section":{{site.baseurl}}/architecture/federation.html
 
-h3. API Server configuration
+h2. Configuration
 
-To accept users from remote clusters, some settings need to be added to the @application.yml@ file. There are two ways in which a remote cluster can be identified: either explictly by listing its prefix-to-hostname mapping, or implicitly by assuming the given remote cluster is public and belongs to the @.arvadosapi.com@ subdomain.
+To enable a cluster to communicate with other clusters, some settings need to be added to the @config.yml@ file.  Federated clusters are identified by listing the cluster-to-hostname mapping in the @RemoteClusters@ section.
 
-For example, if you want to set up a private cluster federation, the following configuration will only allow access to users from @clsr2@ & @clsr3@:
-
-<pre>
-production:
-  remote_hosts:
-    clsr2: api.cluster2.com
-    clsr3: api.cluster3.com
-  remote_hosts_via_dns: false
-  auto_activate_users_from: []
-</pre>
-
-The additional @auto_activate_users_from@ setting can be used to allow users from the clusters in the federation to not only read but also create & update objects on the local cluster. This feature is covered in more detail in the "user activation section":{{site.baseurl}}/admin/activation.html. In the current example, only manually activated remote users would have full access to the local cluster.
-
-h3. Arvados controller & keepstores configuration
-
-Both @arvados-controller@ and @keepstore@ services also need to be configured, as they proxy requests to remote clusters when needed.
-
-Continuing the previous example, the necessary settings should be added to the @/etc/arvados/config.yml@ file as follows:
+Here is an example of the settings that should be added to the @/etc/arvados/config.yml@ file:
 
 <pre>
 Clusters:
@@ -44,14 +27,39 @@ Clusters:
       clsr2:
         Host: api.cluster2.com
         Proxy: true
+       ActivateUsers: true
       clsr3:
         Host: api.cluster3.com
         Proxy: true
+       ActivateUsers: false
 </pre>
 
 Similar settings should be added to @clsr2@ & @clsr3@ hosts, so that all clusters in the federation can talk to each other.
 
-h3. Testing
+The @ActivateUsers@ setting indicates whether users from a given cluster are automatically activated or they require manual activation.  User activation is covered in more detail in the "user activation section":{{site.baseurl}}/admin/activation.html.  In the current example, users from @clsr2@ would be automatically, activated, but users from @clsr3@ would require an admin to activate the account.
+
+h2(#LoginCluster). Federation user management
+
+A federation of clusters can be configured to use a separate user database per cluster, or delegate a central cluster to manage the database.
+
+If clusters belong to separate organizations, each cluster will have its own user database for the members of that organization.  Through federation, a user from one organization can be granted access to the cluster of another organization.  The admin of the seond cluster controls access on a individual basis by choosing to activate or deactivate accounts from other organizations (with the default policy the value of  @ActivateUsers@).
+
+On the other hand, if all clusters belong to the same organization, and users in that organization should have access to all the clusters, user management can be simplified by setting the @LoginCluster@ which manages the user database used by all other clusters in the federation.  To do this, choose one cluster in the federation which will be the 'login cluster'.  Set the the @Login.LoginCluster@ configuration value on all clusters in the federation to the cluster id of the login cluster.  After setting @LoginCluster@, restart arvados-api-server and arvados-controller.
+
+<pre>
+Clusters:
+  clsr2:
+    Login:
+      LoginCluster: clsr1
+</pre>
+
+The @LoginCluster@ configuration redirects all user logins to the LoginCluster, and the LoginCluster will issue API tokens which are valid on any cluster in the federation.  Users are activated or deactivated across the entire federation based on their status on the master cluster.
+
+Note: tokens issued by the master cluster need to be periodically re-validated when used on other clusters in the federation.  The period between revalidation attempts is configured with @Login.RemoteTokenRefresh@.  The default is 5 minutes.  A longer period reduces overhead from validating tokens, but means it will take longer for other clusters to notice when a token has been revoked or a user has changed status (being activated/deactivated, admin flag changed).
+
+To migrate users of existing clusters with separate user databases to use a single LoginCluster, use "arv-federation-migrate":merge-remote-account.html .
+
+h2. Testing
 
 Following the above example, let's suppose @clsr1@ is our "home cluster", that is to say, we use our @clsr1@ user account as our federated identity and both @clsr2@ and @clsr3@ remote clusters are set up to allow users from @clsr1@ and to auto-activate them. The first thing to do would be to log into a remote workbench using the local user token. This can be done following these steps:
 
index c6188095ea90f63ce5eb35e15737c1a60583f73d..2fa7a6e41ca89c6c2bc4f9f72cf694ec8647e06f 100644 (file)
@@ -25,6 +25,23 @@ As part of migrating a user, any data or permissions associated with old user ac
 
 h2. Get user report
 
+h3. With a LoginCluster
+
+When using centralized user database as specified by "LoginCluster":federation.html#LoginCluster in the config file.
+
+Set the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ environment variables to be an admin user on cluster in @LoginCluster@ .  It will automatically determine the other clusters that are listed in the federation.
+
+Next, run @arv-federation-migrate@ with the @--report@ flag:
+
+<pre>
+$ arv-federation-migrate --report users.csv
+Getting user list from x6b1s
+Getting user list from x3982
+Wrote users.csv
+</pre>
+
+h3. Without a LoginCluster
+
 The first step is to create @tokens.csv@ and list each cluster and API token to access the cluster.  API tokens must be trusted tokens with administrator access.  This is a simple comma separated value file and can be created in a text editor.  Example:
 
 _tokens.csv_
@@ -44,36 +61,45 @@ Getting user list from x3982
 Wrote users.csv
 </pre>
 
+h2. Update the user report
+
 This will produce a report of users across all clusters listed in @tokens.csv@, sorted by email address.  This file can be loaded into a text editor or spreadsheet program for ease of viewing and editing.
 
 _users.csv_
 
 <pre>
-email,user uuid,primary cluster/user
-person_a@example.com,x6b1s-tpzed-hb5n7doogwhk6cf,x6b1s
-person_b@example.com,x3982-tpzed-1vl3k7knf7qihbe,
-person_b@example.com,x6b1s-tpzed-w4nhkx2rmrhlr54,
+email,username,user uuid,primary cluster/user
+person_a@example.com,person_a,x6b1s-tpzed-hb5n7doogwhk6cf,x6b1s
+person_b@example.com,person_b,x3982-tpzed-1vl3k7knf7qihbe,
+person_b@example.com,person_b,x6b1s-tpzed-w4nhkx2rmrhlr54,
 </pre>
 
-The third column describes that user's home cluster.  If a user only has one account (identified by email address), the column will be filled in and there is nothing to do.  If the column is blank, that means there is more than one Arvados account associated with the user.  Edit the file and provide the desired home cluster for each user.  In this example, <code>person_b@example.com</code> is assigned the home cluster @x3982@.
+The fourth column describes that user's home cluster.  If a user only has one account (identified by email address), the column will be filled in and there is nothing to do.  If the column is blank, that means there is more than one Arvados account associated with the user.  Edit the file and provide the desired home cluster for each user as necessary (note: if there is a LoginCluster, all users will be migrated to the LoginCluster).  It is also possible to change the desired username for a user.  In this example, <code>person_b@example.com</code> is assigned the home cluster @x3982@.
 
 _users.csv_
 
 <pre>
-email,user uuid,primary cluster/user
-person_a@example.com,x6b1s-tpzed-hb5n7doogwhk6cf,x6b1s
-person_b@example.com,x3982-tpzed-1vl3k7knf7qihbe,x3982
-person_b@example.com,x6b1s-tpzed-w4nhkx2rmrhlr54,x3982
+email,username,user uuid,primary cluster/user
+person_a@example.com,person_a,x6b1s-tpzed-hb5n7doogwhk6cf,x6b1s
+person_b@example.com,person_b,x3982-tpzed-1vl3k7knf7qihbe,x3982
+person_b@example.com,person_b,x6b1s-tpzed-w4nhkx2rmrhlr54,x3982
 </pre>
 
 h2. Migrate users
 
 To avoid disruption, advise users to log out and avoid running workflows while performing the migration.
 
-After updating @users.csv@, use the @--migrate@ option:
+After updating @users.csv@, you can preview the migration using the @--dry-run@ option (add @--tokens tokens.csv@ if not using LoginCluster).  This will print out what actions the migration will take (as if it were happening) and report possible problems, but not make any actual changes on any cluster:
+
+<pre>
+$ arv-federation-migrate --dry-run users.csv
+(person_b@example.com) Migrating x6b1s-tpzed-w4nhkx2rmrhlr54 to x3982-tpzed-1vl3k7knf7qihbe
+</pre>
+
+Execute the migration using the @--migrate@ option (add @--tokens tokens.csv@ if not using LoginCluster):
 
 <pre>
-$ arv-federation-migrate --tokens tokens.csv --migrate users.csv
+$ arv-federation-migrate --migrate users.csv
 (person_b@example.com) Migrating x6b1s-tpzed-w4nhkx2rmrhlr54 to x3982-tpzed-1vl3k7knf7qihbe
 </pre>
 
index 19463581f6f3cc20b44d46d551783d30b40a6b00..7d5aac47248bfa9c183e47c26f3f0e85a061dca0 100644 (file)
@@ -10,7 +10,7 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
-The "containers" API is the recommended way to submit compute work to Arvados.  It supersedes the "jobs" API, which is deprecated.
+The "containers" API is the recommended way to submit compute work to Arvados.  It supersedes the "jobs" API, which is end-of-life as of Arvados 1.5.
 
 h2. Benefits over the "jobs" API
 
index 08dad1e3173391b6c34e16b0eac5096b2598a028..82f7a33be6ce26206184c1a536972e980d5e55a5 100644 (file)
@@ -43,15 +43,6 @@ Clusters:
         Proxy: true
 </pre>
 
-The @remote_hosts@ section of @application.yml@ (for the API server)
-
-<pre>
-production:
-  remote_hosts:
-    clsr2: api.cluster2.com
-    clsr3: api.cluster3.com
-</pre>
-
 In this example, the cluster @clsr1@ is configured to contact @api.cluster2.com@ for requests involving @clsr2@ and @api.cluster3.com@ for requests involving @clsr3@.
 
 h2(#identity). Identity
index 4e3bf6d6c937d89ccdc8cdd1781e5f3d76425965..52856c8438c6d672eb47d4ee631a913a0d59cfcd 100644 (file)
@@ -1057,5 +1057,23 @@ Clusters:
       VocabularyURL: ""
       FileViewersConfigURL: ""
 
+      # Workbench welcome screen, this is HTML text that will be
+      # incorporated directly onto the page.
+      WelcomePageHTML: |
+        <img src="arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />
+        <h2>Please log in.</h2>
+
+        <p>The "Log in" button below will show you a sign-in
+        page. After you log in, you will be redirected back to
+        Arvados Workbench.</p>
+
+        <p>If you have never used Arvados Workbench before, logging in
+        for the first time will automatically create a new
+        account.</p>
+
+        <i>Arvados Workbench uses your name and email address only for
+        identification, and does not retrieve any other personal
+        information.</i>
+
     # Use experimental controller code (see https://dev.arvados.org/issues/14287)
     EnableBetaController14287: false
index 22eed080a2ca041103d40f157a4645a9ece85b3e..7b11e090eeee7479effd5c37b8c834798041c0a7 100644 (file)
@@ -539,7 +539,7 @@ func (ldr *Loader) loadOldKeepBalanceConfig(cfg *arvados.Config) error {
        if oc.KeepServiceTypes != nil {
                numTypes := len(*oc.KeepServiceTypes)
                if numTypes != 0 && !(numTypes == 1 && (*oc.KeepServiceTypes)[0] == "disk") {
-                       return fmt.Errorf(msg, "KeepServiceType")
+                       return fmt.Errorf(msg, "KeepServiceTypes")
                }
        }
 
index 5437836f6fee05f3aded39954ea8d626d3c12f6e..cd58868ed105a6143205b79e21a47147360307cd 100644 (file)
@@ -207,6 +207,7 @@ var whitelist = map[string]bool{
        "Workbench.UserProfileFormFields.*.*.*":        true,
        "Workbench.UserProfileFormMessage":             true,
        "Workbench.VocabularyURL":                      true,
+       "Workbench.WelcomePageHTML":                    true,
 }
 
 func redactUnsafe(m map[string]interface{}, mPrefix, lookupPrefix string) error {
index d21bb2d284b57d5dfff01a346c0708d98ca8db13..6e7aba81dfc84db1f9b8d1c807b2aa2cf1bb566a 100644 (file)
@@ -1063,6 +1063,24 @@ Clusters:
       VocabularyURL: ""
       FileViewersConfigURL: ""
 
+      # Workbench welcome screen, this is HTML text that will be
+      # incorporated directly onto the page.
+      WelcomePageHTML: |
+        <img src="arvados-logo-big.png" style="width: 20%; float: right; padding: 1em;" />
+        <h2>Please log in.</h2>
+
+        <p>The "Log in" button below will show you a sign-in
+        page. After you log in, you will be redirected back to
+        Arvados Workbench.</p>
+
+        <p>If you have never used Arvados Workbench before, logging in
+        for the first time will automatically create a new
+        account.</p>
+
+        <i>Arvados Workbench uses your name and email address only for
+        identification, and does not retrieve any other personal
+        information.</i>
+
     # Use experimental controller code (see https://dev.arvados.org/issues/14287)
     EnableBetaController14287: false
 `)
index d34df7f2c45918103949300ca6619027696b757f..5dc0b1e86f8f1ff66d689f19ad6ab7d7b699a3de 100644 (file)
@@ -65,21 +65,27 @@ func (s *HandlerSuite) TestConfigExport(c *check.C) {
        s.cluster.SystemRootToken = "secret"
        s.cluster.Collections.BlobSigning = true
        s.cluster.Collections.BlobSigningTTL = arvados.Duration(23 * time.Second)
-       req := httptest.NewRequest("GET", "/arvados/v1/config", nil)
-       resp := httptest.NewRecorder()
-       s.handler.ServeHTTP(resp, req)
-       c.Check(resp.Code, check.Equals, http.StatusOK)
-       c.Check(resp.Header().Get("Access-Control-Allow-Origin"), check.Equals, `*`)
-       c.Check(resp.Header().Get("Access-Control-Allow-Methods"), check.Matches, `.*\bGET\b.*`)
-       c.Check(resp.Header().Get("Access-Control-Allow-Headers"), check.Matches, `.+`)
-       var cluster arvados.Cluster
-       c.Log(resp.Body.String())
-       err := json.Unmarshal(resp.Body.Bytes(), &cluster)
-       c.Check(err, check.IsNil)
-       c.Check(cluster.ManagementToken, check.Equals, "")
-       c.Check(cluster.SystemRootToken, check.Equals, "")
-       c.Check(cluster.Collections.BlobSigning, check.DeepEquals, true)
-       c.Check(cluster.Collections.BlobSigningTTL, check.Equals, arvados.Duration(23*time.Second))
+       for _, method := range []string{"GET", "OPTIONS"} {
+               req := httptest.NewRequest(method, "/arvados/v1/config", nil)
+               resp := httptest.NewRecorder()
+               s.handler.ServeHTTP(resp, req)
+               c.Check(resp.Code, check.Equals, http.StatusOK)
+               c.Check(resp.Header().Get("Access-Control-Allow-Origin"), check.Equals, `*`)
+               c.Check(resp.Header().Get("Access-Control-Allow-Methods"), check.Matches, `.*\bGET\b.*`)
+               c.Check(resp.Header().Get("Access-Control-Allow-Headers"), check.Matches, `.+`)
+               if method == "OPTIONS" {
+                       c.Check(resp.Body.String(), check.HasLen, 0)
+                       continue
+               }
+               var cluster arvados.Cluster
+               c.Log(resp.Body.String())
+               err := json.Unmarshal(resp.Body.Bytes(), &cluster)
+               c.Check(err, check.IsNil)
+               c.Check(cluster.ManagementToken, check.Equals, "")
+               c.Check(cluster.SystemRootToken, check.Equals, "")
+               c.Check(cluster.Collections.BlobSigning, check.DeepEquals, true)
+               c.Check(cluster.Collections.BlobSigningTTL, check.Equals, arvados.Duration(23*time.Second))
+       }
 }
 
 func (s *HandlerSuite) TestProxyDiscoveryDoc(c *check.C) {
index 78f099549657538102115efc5823c8e9f672c105..205ee5018710a47749e276f73985ceb3b1b1af01 100644 (file)
@@ -61,6 +61,13 @@ func (sch *Scheduler) sync() {
                                // preparing to run a container that
                                // has already been unlocked/requeued.
                                go sch.kill(uuid, fmt.Sprintf("state=%s", ent.Container.State))
+                       } else if ent.Container.Priority == 0 {
+                               sch.logger.WithFields(logrus.Fields{
+                                       "ContainerUUID": uuid,
+                                       "State":         ent.Container.State,
+                                       "Priority":      ent.Container.Priority,
+                               }).Info("container on hold")
+                               sch.queue.Forget(uuid)
                        }
                case arvados.ContainerStateLocked:
                        if running && !exited.IsZero() && qUpdated.After(exited) {
diff --git a/lib/dispatchcloud/scheduler/sync_test.go b/lib/dispatchcloud/scheduler/sync_test.go
new file mode 100644 (file)
index 0000000..d62bd7c
--- /dev/null
@@ -0,0 +1,56 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package scheduler
+
+import (
+       "context"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+       check "gopkg.in/check.v1"
+)
+
+// Ensure the scheduler expunges containers from the queue when they
+// are no longer relevant (completed and not running, queued with
+// priority 0, etc).
+func (*SchedulerSuite) TestForgetIrrelevantContainers(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
+       pool := stubPool{}
+       queue := test.Queue{
+               ChooseType: chooseType,
+               Containers: []arvados.Container{
+                       {
+                               UUID:     test.ContainerUUID(1),
+                               Priority: 0,
+                               State:    arvados.ContainerStateQueued,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 1,
+                                       RAM:   1 << 30,
+                               },
+                       },
+                       {
+                               UUID:     test.ContainerUUID(2),
+                               Priority: 12345,
+                               State:    arvados.ContainerStateComplete,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 1,
+                                       RAM:   1 << 30,
+                               },
+                       },
+               },
+       }
+       queue.Update()
+
+       ents, _ := queue.Entries()
+       c.Check(ents, check.HasLen, 1)
+
+       sch := New(ctx, &queue, &pool, time.Millisecond, time.Millisecond)
+       sch.sync()
+
+       ents, _ = queue.Entries()
+       c.Check(ents, check.HasLen, 0)
+}
index 0391c5a043dd75e6d3cdaa38f6ba31c9c4dec3ca..70892cfc3a933e0addf17c6e1be899d2c0c031e4 100644 (file)
@@ -164,23 +164,26 @@ func getListenAddr(svcs arvados.Services, prog arvados.ServiceName, log logrus.F
        if !ok {
                return arvados.URL{}, fmt.Errorf("unknown service name %q", prog)
        }
+       errors := []string{}
        for url := range svc.InternalURLs {
-               if strings.HasPrefix(url.Host, "localhost:") {
-                       return url, nil
-               }
                listener, err := net.Listen("tcp", url.Host)
                if err == nil {
                        listener.Close()
                        return url, nil
                } else if strings.Contains(err.Error(), "cannot assign requested address") {
+                       // If 'Host' specifies a different server than
+                       // the current one, it'll resolve the hostname
+                       // to IP address, and then fail because it
+                       // can't bind an IP address it doesn't own.
                        continue
-               } else if strings.Contains(err.Error(), "address already in use") {
-                       return url, err
                } else {
-                       log.Warn(err)
+                       errors = append(errors, fmt.Sprintf("tried %v, got %v", url, err))
                }
        }
-       return arvados.URL{}, fmt.Errorf("configuration does not enable the %s service on this host", prog)
+       if len(errors) > 0 {
+               return arvados.URL{}, fmt.Errorf("could not enable the %q service on this host: %s", prog, strings.Join(errors, "; "))
+       }
+       return arvados.URL{}, fmt.Errorf("configuration does not enable the %q service on this host", prog)
 }
 
 type contextKeyURL struct{}
index f828e92bd6ede4f719d1c6c963b80a57905d6b5b..8c8b0a82a4a6e02eb1d140c2195af5dcaae6028c 100644 (file)
@@ -26,7 +26,7 @@ Gem::Specification.new do |s|
   s.executables << "arv"
   s.executables << "arv-tag"
   s.required_ruby_version = '>= 2.1.0'
-  s.add_runtime_dependency 'arvados', '~> 1.3.0', '>= 1.3.0'
+  s.add_runtime_dependency 'arvados', '>= 1.4.1.20190320201707'
   # Our google-api-client dependency used to be < 0.9, but that could be
   # satisfied by the buggy 0.9.pre*.  https://dev.arvados.org/issues/9213
   s.add_runtime_dependency 'arvados-google-api-client', '~> 0.6', '>= 0.6.3', '<0.8.9'
index c0e3e0de2a70fb7d542af89fa53757b3126a04d1..e12fe185a039ff509e360b2a6d29d1219ca29afe 100644 (file)
@@ -70,7 +70,7 @@ def logtail(logcollection, logfunc, header, maxlen=25):
             logname = log[:-4]
             logt = deque([], maxlen)
             mergelogs[logname] = logt
-            with logcollection.open(log) as f:
+            with logcollection.open(log, encoding="utf-8") as f:
                 for l in f:
                     if containersapi:
                         g = timestamp_re.match(l)
index eed2fe19df6a3f78a4a1f0ee40d26ccbf50f3349..406ebfd2da064df383105b8e0a7c8f4e7b19a529 100644 (file)
@@ -250,7 +250,11 @@ The 'jobs' API is no longer supported.
         activity statuses, for example in the RuntimeStatusLoggingHandler.
         """
         with self.workflow_eval_lock:
-            current = arvados_cwl.util.get_current_container(self.api, self.num_retries, logger)
+            current = None
+            try:
+                current = arvados_cwl.util.get_current_container(self.api, self.num_retries, logger)
+            except Exception as e:
+                logger.info("Couldn't get current container: %s", e)
             if current is None:
                 return
             runtime_status = current.get('runtime_status', {})
index 9a08195a7ab4122f0f3b9a304a643f942eaa3a97..81b542057c2d780f4e2eb1a87f64bacb25ecf7bf 100644 (file)
@@ -22,6 +22,11 @@ inputs:
   insecure:
     type: boolean
     default: true
+  arvbox:
+    type: File
+    default:
+      class: File
+      location: ../../../../tools/arvbox/bin/arvbox
 outputs:
   arvados_api_token:
     type: string
@@ -32,12 +37,21 @@ outputs:
   arvados_cluster_ids:
     type: string[]
     outputSource: start/cluster_id
+  superuser_tokens:
+    type: string[]
+    outputSource: start/superuser_token
   acr:
     type: string?
     outputSource: in_acr
   arvado_api_host_insecure:
     type: boolean
     outputSource: insecure
+  arvbox_containers:
+    type: string[]
+    outputSource: containers
+  arvbox_bin:
+    type: File
+    outputSource: arvbox
 steps:
   mkdir:
     in:
@@ -49,6 +63,7 @@ steps:
     in:
       container_name: containers
       arvbox_data: mkdir/arvbox_data
+      arvbox_bin: arvbox
     out: [cluster_id, container_host, arvbox_data_out, superuser_token]
     scatter: [container_name, arvbox_data]
     scatterMethod: dotproduct
@@ -60,6 +75,7 @@ steps:
       cluster_ids: start/cluster_id
       cluster_hosts: start/container_host
       arvbox_data: start/arvbox_data_out
+      arvbox_bin: arvbox
     out: []
     scatter: [container_name, this_cluster_id, arvbox_data]
     scatterMethod: dotproduct
index 77567ee89df91f67c028ff9641ce103285d83984..76523a56befed127cebd920d9f9b594f85baafb1 100644 (file)
@@ -13,6 +13,7 @@ inputs:
   cluster_ids: string[]
   cluster_hosts: string[]
   arvbox_data: Directory
+  arvbox_bin: File
 outputs:
   arvbox_data_out:
     type: Directory
@@ -60,7 +61,7 @@ arguments:
     valueFrom: |
       docker cp cluster_config.yml.override $(inputs.container_name):/var/lib/arvados
       docker cp application.yml.override $(inputs.container_name):/usr/src/arvados/services/api/config
-      arvbox sv restart api
-      arvbox sv restart controller
-      arvbox sv restart keepstore0
-      arvbox sv restart keepstore1
+      $(inputs.arvbox_bin.path) sv restart api
+      $(inputs.arvbox_bin.path) sv restart controller
+      $(inputs.arvbox_bin.path) sv restart keepstore0
+      $(inputs.arvbox_bin.path) sv restart keepstore1
index 0fddc1b8791783b2af29f1aeea2e344b0b822c3f..a3ad6e575e6ecb2d8526855536c45c2334d5c26f 100644 (file)
@@ -31,4 +31,4 @@ inputs:
 outputs:
   test_user_uuid: string
   test_user_token: string
-arguments: [python2, $(inputs.make_user_script)]
\ No newline at end of file
+arguments: [python, $(inputs.make_user_script)]
index f69775a530000ab6b1da15c043d59ba191023957..a0b3e1864ba8f7d1418f1b396e908aee15079154 100644 (file)
@@ -10,6 +10,7 @@ $namespaces:
 inputs:
   container_name: string
   arvbox_data: Directory
+  arvbox_bin: File
 outputs:
   cluster_id:
     type: string
@@ -66,7 +67,7 @@ requirements:
 arguments:
   - shellQuote: false
     valueFrom: |
-      set -e
-      arvbox start dev
-      arvbox status > status.txt
-      arvbox cat /var/lib/arvados/superuser_token > superuser_token.txt
\ No newline at end of file
+      set -ex
+      $(inputs.arvbox_bin.path) start dev
+      $(inputs.arvbox_bin.path) status > status.txt
+      $(inputs.arvbox_bin.path) cat /var/lib/arvados/superuser_token > superuser_token.txt
index 3374e1c13f8004100c2f3c114edbfba2db26dec6..cb6cfbadbdb00dcfe56872683b5851017eeb0f8f 100644 (file)
@@ -534,6 +534,25 @@ class TestContainer(unittest.TestCase):
         except RuntimeError:
             self.fail("RuntimeStatusLoggingHandler should not be called recursively")
 
+
+    # Test to make sure that an exception raised from
+    # get_current_container doesn't cause the logger to raise an
+    # exception
+    @mock.patch("arvados_cwl.util.get_current_container")
+    def test_runtime_status_get_current_container_exception(self, gcc_mock):
+        self.setup_and_test_container_executor_and_logging(gcc_mock)
+        root_logger = logging.getLogger('')
+
+        # get_current_container is invoked when we call
+        # runtime_status_update, it is going to also raise an
+        # exception.
+        gcc_mock.side_effect = Exception("Second Error")
+        try:
+            root_logger.error("First Error")
+        except Exception:
+            self.fail("Exception in logger should not propagate")
+        self.assertTrue(gcc_mock.called)
+
     @mock.patch("arvados_cwl.ArvCwlExecutor.runtime_status_update")
     @mock.patch("arvados_cwl.util.get_current_container")
     @mock.patch("arvados.collection.CollectionReader")
index 7c1c3538094869ff82a510226575a2dbbd0491ab..638c56fb95d16b28f57216dca86145b77353319c 100644 (file)
@@ -207,6 +207,7 @@ type Cluster struct {
                }
                UserProfileFormMessage string
                VocabularyURL          string
+               WelcomePageHTML        string
        }
 
        EnableBetaController14287 bool
index 23e6e016d303bbc78abefcc39bfa0fb65b8ef0fe..8889453125c4753a62927f830c4e236ecbc272a6 100644 (file)
@@ -76,8 +76,9 @@ func (h *limiterHandler) Max() int {
 func (h *limiterHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        if cap(h.requests) == 0 {
                atomic.AddInt64(&h.count, 1)
+               defer atomic.AddInt64(&h.count, -1)
                h.handler.ServeHTTP(resp, req)
-               atomic.AddInt64(&h.count, -1)
+               return
        }
        select {
        case h.requests <- struct{}{}:
index 62936e71831fb1fa055b213fac470f2adeb5ea16..02c788bd9ae2196812bd856a2894855c372ca9df 100644 (file)
@@ -27,8 +27,9 @@ func RefreshServiceDiscovery() {
        defer svcListCacheMtx.Unlock()
        for _, ent := range svcListCache {
                wg.Add(1)
+               clear := ent.clear
                go func() {
-                       ent.clear <- struct{}{}
+                       clear <- struct{}{}
                        wg.Done()
                }()
        }
index 37666eb8e8b8f7e2d8f4cbbdf76ff7bda56b003b..6893b94bf78b7b16a1da1a802fdabe419563eb0d 100644 (file)
@@ -481,7 +481,7 @@ class _BlockManager(object):
     DEFAULT_PUT_THREADS = 2
     DEFAULT_GET_THREADS = 2
 
-    def __init__(self, keep, copies=None, put_threads=None):
+    def __init__(self, keep, copies=None, put_threads=None, num_retries=None):
         """keep: KeepClient object to use"""
         self._keep = keep
         self._bufferblocks = collections.OrderedDict()
@@ -500,6 +500,7 @@ class _BlockManager(object):
         self._pending_write_size = 0
         self.threads_lock = threading.Lock()
         self.padding_block = None
+        self.num_retries = num_retries
 
     @synchronized
     def alloc_bufferblock(self, blockid=None, starting_capacity=2**14, owner=None):
@@ -554,9 +555,9 @@ class _BlockManager(object):
                     return
 
                 if self.copies is None:
-                    loc = self._keep.put(bufferblock.buffer_view[0:bufferblock.write_pointer].tobytes())
+                    loc = self._keep.put(bufferblock.buffer_view[0:bufferblock.write_pointer].tobytes(), num_retries=self.num_retries)
                 else:
-                    loc = self._keep.put(bufferblock.buffer_view[0:bufferblock.write_pointer].tobytes(), copies=self.copies)
+                    loc = self._keep.put(bufferblock.buffer_view[0:bufferblock.write_pointer].tobytes(), num_retries=self.num_retries, copies=self.copies)
                 bufferblock.set_state(_BufferBlock.COMMITTED, loc)
             except Exception as e:
                 bufferblock.set_state(_BufferBlock.ERROR, e)
@@ -725,9 +726,9 @@ class _BlockManager(object):
         if sync:
             try:
                 if self.copies is None:
-                    loc = self._keep.put(block.buffer_view[0:block.write_pointer].tobytes())
+                    loc = self._keep.put(block.buffer_view[0:block.write_pointer].tobytes(), num_retries=self.num_retries)
                 else:
-                    loc = self._keep.put(block.buffer_view[0:block.write_pointer].tobytes(), copies=self.copies)
+                    loc = self._keep.put(block.buffer_view[0:block.write_pointer].tobytes(), num_retries=self.num_retries, copies=self.copies)
                 block.set_state(_BufferBlock.COMMITTED, loc)
             except Exception as e:
                 block.set_state(_BufferBlock.ERROR, e)
index cf1a36f9fdfbbfdf739fe75027d00eaa782df4f2..26902931582244142054d69bc2e52fe596927de3 100644 (file)
@@ -1410,7 +1410,7 @@ class Collection(RichCollectionBase):
             copies = (self.replication_desired or
                       self._my_api()._rootDesc.get('defaultCollectionReplication',
                                                    2))
-            self._block_manager = _BlockManager(self._my_keep(), copies=copies, put_threads=self.put_threads)
+            self._block_manager = _BlockManager(self._my_keep(), copies=copies, put_threads=self.put_threads, num_retries=self.num_retries)
         return self._block_manager
 
     def _remember_api_response(self, response):
index 1daf6beb7caa677578f5400523d5d16c08b911f6..885d6fda03560fccb735216ff10b6c730dd1c1c2 100755 (executable)
@@ -3,6 +3,15 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+#
+# Migration tool for merging user accounts belonging to the same user
+# but on separate clusters to use a single user account managed by a
+# specific cluster.
+#
+# If you're working on this, see
+# arvados/sdk/python/tests/fed-migrate/README for information about
+# the testing infrastructure.
+
 import arvados
 import arvados.util
 import arvados.errors
@@ -11,51 +20,289 @@ import sys
 import argparse
 import hmac
 import urllib.parse
+import os
+import hashlib
+from arvados._version import __version__
 
-def main():
-
-    parser = argparse.ArgumentParser(description='Migrate users to federated identity, see https://doc.arvados.org/admin/merge-remote-account.html')
-    parser.add_argument('--tokens', type=str, required=True)
-    group = parser.add_mutually_exclusive_group(required=True)
-    group.add_argument('--report', type=str, help="Generate report .csv file listing users by email address and their associated Arvados accounts")
-    group.add_argument('--migrate', type=str, help="Consume report .csv and migrate users to designated Arvados accounts")
-    group.add_argument('--check', action="store_true", help="Check that tokens are usable and the federation is well connected")
-    args = parser.parse_args()
+EMAIL=0
+USERNAME=1
+UUID=2
+HOMECLUSTER=3
 
+def connect_clusters(args):
     clusters = {}
     errors = []
-    print("Reading %s" % args.tokens)
-    with open(args.tokens, "rt") as f:
+    loginCluster = None
+    if args.tokens:
+        print("Reading %s" % args.tokens)
+        with open(args.tokens, "rt") as f:
+            for r in csv.reader(f):
+                if len(r) != 2:
+                    continue
+                host = r[0]
+                token = r[1]
+                print("Contacting %s" % (host))
+                arv = arvados.api(host=host, token=token, cache=False)
+                clusters[arv._rootDesc["uuidPrefix"]] = arv
+    else:
+        arv = arvados.api(cache=False)
+        rh = arv._rootDesc["remoteHosts"]
+        tok = arv.api_client_authorizations().current().execute()
+        token = "v2/%s/%s" % (tok["uuid"], tok["api_token"])
+
+        for k,v in rh.items():
+            arv = arvados.api(host=v, token=token, cache=False, insecure=os.environ.get("ARVADOS_API_HOST_INSECURE"))
+            clusters[k] = arv
+
+    for _, arv in clusters.items():
+        config = arv.configs().get().execute()
+        if config["Login"]["LoginCluster"] != "" and loginCluster is None:
+            loginCluster = config["Login"]["LoginCluster"]
+
+    print("Checking that the federation is well connected")
+    for arv in clusters.values():
+        config = arv.configs().get().execute()
+        if loginCluster and config["Login"]["LoginCluster"] != loginCluster and config["ClusterID"] != loginCluster:
+            errors.append("Inconsistent login cluster configuration, expected '%s' on %s but was '%s'" % (loginCluster, config["ClusterID"], config["Login"]["LoginCluster"]))
+            continue
+
+        if arv._rootDesc["revision"] < "20190926":
+            errors.append("Arvados API server revision on cluster '%s' is too old, must be updated to at least Arvados 1.5 before running migration." % config["ClusterID"])
+            continue
+
+        try:
+            cur = arv.users().current().execute()
+        except arvados.errors.ApiError as e:
+            errors.append("checking token for %s   %s" % (arv._rootDesc["rootUrl"], e))
+            continue
+
+        if not cur["is_admin"]:
+            errors.append("User %s is not admin on %s" % (cur["uuid"], arv._rootDesc["uuidPrefix"]))
+            continue
+
+        for r in clusters:
+            if r != arv._rootDesc["uuidPrefix"] and r not in arv._rootDesc["remoteHosts"]:
+                errors.append("%s is missing from remoteHosts of %s" % (r, arv._rootDesc["uuidPrefix"]))
+        for r in arv._rootDesc["remoteHosts"]:
+            if r != "*" and r not in clusters:
+                print("WARNING: %s is federated with %s but %s is missing from the tokens file or the token is invalid" % (arv._rootDesc["uuidPrefix"], r, r))
+
+    return clusters, errors, loginCluster
+
+
+def fetch_users(clusters, loginCluster):
+    rows = []
+    by_email = {}
+    by_username = {}
+
+    users = []
+    for c, arv in clusters.items():
+        print("Getting user list from %s" % c)
+        ul = arvados.util.list_all(arv.users().list)
+        for l in ul:
+            if l["uuid"].startswith(c):
+                users.append(l)
+
+    # Users list is sorted by email
+    # Go through users and collect users with same email
+    # when we see a different email (or get to the end)
+    # call add_accum_rows() to generate the report rows with
+    # the "home cluster" set, and also fill in the by_email table.
+
+    users = sorted(users, key=lambda u: u["email"]+"::"+(u["username"] or "")+"::"+u["uuid"])
+
+    accum = []
+    lastemail = None
+
+    def add_accum_rows():
+        homeuuid = None
+        for a in accum:
+            uuids = set(a["uuid"] for a in accum)
+            homeuuid = ((len(uuids) == 1) and uuids.pop()) or ""
+        for a in accum:
+            r = (a["email"], a["username"], a["uuid"], loginCluster or homeuuid[0:5])
+            by_email.setdefault(a["email"], {})
+            by_email[a["email"]][a["uuid"]] = r
+            homeuuid_and_username = "%s::%s" % (r[HOMECLUSTER], a["username"])
+            if homeuuid_and_username not in by_username:
+                by_username[homeuuid_and_username] = a["email"]
+            elif by_username[homeuuid_and_username] != a["email"]:
+                print("ERROR: the username '%s' is listed for both '%s' and '%s' on cluster '%s'" % (r[USERNAME], r[EMAIL], by_username[homeuuid_and_username], r[HOMECLUSTER]))
+                exit(1)
+            rows.append(r)
+
+    for u in users:
+        if u["uuid"].endswith("-anonymouspublic") or u["uuid"].endswith("-000000000000000"):
+            continue
+        if lastemail == None:
+            lastemail = u["email"]
+        if u["email"] == lastemail:
+            accum.append(u)
+        else:
+            add_accum_rows()
+            lastemail = u["email"]
+            accum = [u]
+
+    add_accum_rows()
+
+    return rows, by_email, by_username
+
+
+def read_migrations(args, by_email, by_username):
+    rows = []
+    with open(args.migrate or args.dry_run, "rt") as f:
         for r in csv.reader(f):
-            host = r[0]
-            token = r[1]
-            print("Contacting %s" % (host))
-            arv = arvados.api(host=host, token=token, cache=False)
+            if r[EMAIL] == "email":
+                continue
+            by_email.setdefault(r[EMAIL], {})
+            by_email[r[EMAIL]][r[UUID]] = r
+
+            homeuuid_and_username = "%s::%s" % (r[HOMECLUSTER], r[USERNAME])
+            if homeuuid_and_username not in by_username:
+                by_username[homeuuid_and_username] = r[EMAIL]
+            elif by_username[homeuuid_and_username] != r[EMAIL]:
+                print("ERROR: the username '%s' is listed for both '%s' and '%s' on cluster '%s'" % (r[USERNAME], r[EMAIL], by_username[homeuuid_and_username], r[HOMECLUSTER]))
+                exit(1)
+
+            rows.append(r)
+    return rows
+
+def update_username(args, email, user_uuid, username, migratecluster, migratearv):
+    print("(%s) Updating username of %s to '%s' on %s" % (email, user_uuid, username, migratecluster))
+    if not args.dry_run:
+        try:
+            conflicts = migratearv.users().list(filters=[["username", "=", username]]).execute()
+            if conflicts["items"]:
+                migratearv.users().update(uuid=conflicts["items"][0]["uuid"], body={"user": {"username": username+"migrate"}}).execute()
+            migratearv.users().update(uuid=user_uuid, body={"user": {"username": username}}).execute()
+        except arvados.errors.ApiError as e:
+            print("(%s) Error updating username of %s to '%s' on %s: %s" % (email, user_uuid, username, migratecluster, e))
+
+
+def choose_new_user(args, by_email, email, userhome, username, old_user_uuid, clusters):
+    candidates = []
+    conflict = False
+    for b in by_email[email].values():
+        if b[2].startswith(userhome):
+            candidates.append(b)
+        if b[1] != username and b[3] == userhome:
+            print("(%s) Cannot migrate %s, conflicting usernames %s and %s" % (email, old_user_uuid, b[1], username))
+            conflict = True
+            break
+    if conflict:
+        return None
+    if len(candidates) == 0:
+        if len(userhome) == 5 and userhome not in clusters:
+            print("(%s) Cannot migrate %s, unknown home cluster %s (typo?)" % (email, old_user_uuid, userhome))
+            return None
+        print("(%s) No user listed with same email to migrate %s to %s, will create new user with username '%s'" % (email, old_user_uuid, userhome, username))
+        if not args.dry_run:
+            newhomecluster = userhome[0:5]
+            homearv = clusters[userhome]
+            user = None
             try:
-                cur = arv.users().current().execute()
-                arv.api_client_authorizations().list(limit=1).execute()
+                conflicts = homearv.users().list(filters=[["username", "=", username]]).execute()
+                if conflicts["items"]:
+                    homearv.users().update(uuid=conflicts["items"][0]["uuid"], body={"user": {"username": username+"migrate"}}).execute()
+                user = homearv.users().create(body={"user": {"email": email, "username": username}}).execute()
             except arvados.errors.ApiError as e:
-                errors.append("checking token for %s: %s" % (host, e))
-                errors.append('    This script requires a token issued to a trusted client in order to manipulate access tokens.')
-                errors.append('    See "Trusted client setting" in https://doc.arvados.org/install/install-workbench-app.html')
-                errors.append('    and https://doc.arvados.org/api/tokens.html')
-                continue
+                print("(%s) Could not create user: %s" % (email, str(e)))
+                return None
 
-            if not cur["is_admin"]:
-                errors.append("Not admin of %s" % host)
-                continue
+            tup = (email, username, user["uuid"], userhome)
+        else:
+            # dry run
+            tup = (email, username, "%s-tpzed-xfakexfakexfake" % (userhome[0:5]), userhome)
+        by_email[email][tup[2]] = tup
+        candidates.append(tup)
+    if len(candidates) > 1:
+        print("(%s) Multiple users listed to migrate %s to %s, use full uuid" % (email, old_user_uuid, userhome))
+        return None
+    return candidates[0][2]
 
-            clusters[arv._rootDesc["uuidPrefix"]] = arv
 
+def activate_remote_user(args, email, homearv, migratearv, old_user_uuid, new_user_uuid):
+    # create a token for the new user and salt it for the
+    # migration cluster, then use it to access the migration
+    # cluster as the new user once before merging to ensure
+    # the new user is known on that cluster.
+    migratecluster = migratearv._rootDesc["uuidPrefix"]
+    try:
+        if not args.dry_run:
+            newtok = homearv.api_client_authorizations().create(body={
+                "api_client_authorization": {'owner_uuid': new_user_uuid}}).execute()
+        else:
+            newtok = {"uuid": "dry-run", "api_token": "12345"}
+    except arvados.errors.ApiError as e:
+        print("(%s) Could not create API token for %s: %s" % (email, new_user_uuid, e))
+        return None
+
+    try:
+        olduser = migratearv.users().get(uuid=old_user_uuid).execute()
+    except arvados.errors.ApiError as e:
+        if e.resp.status != 404:
+            print("(%s) Could not retrieve user %s from %s, user may have already been migrated: %s" % (email, old_user_uuid, migratecluster, e))
+        return None
+
+    salted = 'v2/' + newtok["uuid"] + '/' + hmac.new(newtok["api_token"].encode(),
+                                                     msg=migratecluster.encode(),
+                                                     digestmod=hashlib.sha1).hexdigest()
+    try:
+        ru = urllib.parse.urlparse(migratearv._rootDesc["rootUrl"])
+        if not args.dry_run:
+            newuser = arvados.api(host=ru.netloc, token=salted, insecure=os.environ.get("ARVADOS_API_HOST_INSECURE")).users().current().execute()
+        else:
+            newuser = {"is_active": True, "username": username}
+    except arvados.errors.ApiError as e:
+        print("(%s) Error getting user info for %s from %s: %s" % (email, new_user_uuid, migratecluster, e))
+        return None
+
+    if not newuser["is_active"] and olduser["is_active"]:
+        print("(%s) Activating user %s on %s" % (email, new_user_uuid, migratecluster))
+        try:
+            if not args.dry_run:
+                migratearv.users().update(uuid=new_user_uuid, body={"is_active": True}).execute()
+        except arvados.errors.ApiError as e:
+            print("(%s) Could not activate user %s on %s: %s" % (email, new_user_uuid, migratecluster, e))
+            return None
+
+    if olduser["is_admin"] and not newuser["is_admin"]:
+        print("(%s) Not migrating %s because user is admin but target user %s is not admin on %s" % (email, old_user_uuid, new_user_uuid, migratecluster))
+        return None
+
+    return newuser
+
+def migrate_user(args, migratearv, email, new_user_uuid, old_user_uuid):
+    try:
+        if not args.dry_run:
+            grp = migratearv.groups().create(body={
+                "owner_uuid": new_user_uuid,
+                "name": "Migrated from %s (%s)" % (email, old_user_uuid),
+                "group_class": "project"
+            }, ensure_unique_name=True).execute()
+            migratearv.users().merge(old_user_uuid=old_user_uuid,
+                                     new_user_uuid=new_user_uuid,
+                                     new_owner_uuid=grp["uuid"],
+                                     redirect_to_new_user=True).execute()
+    except arvados.errors.ApiError as e:
+        print("(%s) Error migrating user: %s" % (email, e))
 
-    print("Checking that the federation is well connected")
-    for v in clusters.values():
-        for r in clusters:
-            if r != v._rootDesc["uuidPrefix"] and r not in v._rootDesc["remoteHosts"]:
-                errors.append("%s is missing from remoteHosts of %s" % (r, v._rootDesc["uuidPrefix"]))
-        for r in v._rootDesc["remoteHosts"]:
-            if r != "*" and r not in clusters:
-                print("WARNING: %s is federated with %s but %s is missing from the tokens file or the token is invalid" % (v._rootDesc["uuidPrefix"], r, r))
+
+def main():
+
+    parser = argparse.ArgumentParser(description='Migrate users to federated identity, see https://doc.arvados.org/admin/merge-remote-account.html')
+    parser.add_argument(
+        '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
+        help='Print version and exit.')
+    parser.add_argument('--tokens', type=str, required=False)
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument('--report', type=str, help="Generate report .csv file listing users by email address and their associated Arvados accounts")
+    group.add_argument('--migrate', type=str, help="Consume report .csv and migrate users to designated Arvados accounts")
+    group.add_argument('--dry-run', type=str, help="Consume report .csv and report how user would be migrated to designated Arvados accounts")
+    group.add_argument('--check', action="store_true", help="Check that tokens are usable and the federation is well connected")
+    args = parser.parse_args()
+
+    clusters, errors, loginCluster = connect_clusters(args)
 
     if errors:
         for e in errors:
@@ -66,148 +313,59 @@ def main():
         print("Tokens file passed checks")
         exit(0)
 
-    if args.report:
-        users = []
-        for c, arv in clusters.items():
-            print("Getting user list from %s" % c)
-            ul = arvados.util.list_all(arv.users().list)
-            for l in ul:
-                if l["uuid"].startswith(c):
-                    users.append(l)
+    rows, by_email, by_username = fetch_users(clusters, loginCluster)
 
+    if args.report:
         out = csv.writer(open(args.report, "wt"))
+        out.writerow(("email", "username", "user uuid", "home cluster"))
+        for r in rows:
+            out.writerow(r)
+        print("Wrote %s" % args.report)
+        return
 
-        out.writerow(("email", "user uuid", "primary cluster/user"))
-
-        users = sorted(users, key=lambda u: u["email"]+"::"+u["uuid"])
+    if args.migrate or args.dry_run:
+        if args.dry_run:
+            print("Performing dry run")
 
-        accum = []
-        lastemail = None
-        for u in users:
-            if u["uuid"].endswith("-anonymouspublic") or u["uuid"].endswith("-000000000000000"):
-                continue
-            if lastemail == None:
-                lastemail = u["email"]
-            if u["email"] == lastemail:
-                accum.append(u)
-            else:
-                homeuuid = None
-                for a in accum:
-                    if homeuuid is None:
-                        homeuuid = a["uuid"]
-                    if a["uuid"] != homeuuid:
-                        homeuuid = ""
-                for a in accum:
-                    out.writerow((a["email"], a["uuid"], homeuuid[0:5]))
-                lastemail = u["email"]
-                accum = [u]
+        rows = read_migrations(args, by_email, by_username)
 
-        homeuuid = None
-        for a in accum:
-            if homeuuid is None:
-                homeuuid = a["uuid"]
-            if a["uuid"] != homeuuid:
-                homeuuid = ""
-        for a in accum:
-            out.writerow((a["email"], a["uuid"], homeuuid[0:5]))
-
-        print("Wrote %s" % args.report)
-
-    if args.migrate:
-        rows = []
-        by_email = {}
-        with open(args.migrate, "rt") as f:
-            for r in csv.reader(f):
-                if r[0] == "email":
-                    continue
-                by_email.setdefault(r[0], [])
-                by_email[r[0]].append(r)
-                rows.append(r)
         for r in rows:
-            email = r[0]
-            old_user_uuid = r[1]
-            userhome = r[2]
+            email = r[EMAIL]
+            username = r[USERNAME]
+            old_user_uuid = r[UUID]
+            userhome = r[HOMECLUSTER]
 
             if userhome == "":
                 print("(%s) Skipping %s, no home cluster specified" % (email, old_user_uuid))
             if old_user_uuid.startswith(userhome):
+                migratecluster = old_user_uuid[0:5]
+                migratearv = clusters[migratecluster]
+                if migratearv.users().get(uuid=old_user_uuid).execute()["username"] != username:
+                    update_username(args, email, old_user_uuid, username, migratecluster, migratearv)
                 continue
-            candidates = []
-            for b in by_email[email]:
-                if b[1].startswith(userhome):
-                    candidates.append(b)
-            if len(candidates) == 0:
-                if len(userhome) == 5 and userhome not in clusters:
-                    print("(%s) Cannot migrate %s, unknown home cluster %s (typo?)" % (email, old_user_uuid, userhome))
-                else:
-                    print("(%s) No user listed with same email to migrate %s to %s" % (email, old_user_uuid, userhome))
-                continue
-            if len(candidates) > 1:
-                print("(%s) Multiple users listed to migrate %s to %s, use full uuid" % (email, old_user_uuid, userhome))
-                continue
-            new_user_uuid = candidates[0][1]
-
-            # cluster where the migration is happening
-            migratecluster = old_user_uuid[0:5]
-            migratearv = clusters[migratecluster]
-
-            # the user's new home cluster
-            newhomecluster = userhome[0:5]
-            homearv = clusters[newhomecluster]
 
-            # create a token for the new user and salt it for the
-            # migration cluster, then use it to access the migration
-            # cluster as the new user once before merging to ensure
-            # the new user is known on that cluster.
-            try:
-                newtok = homearv.api_client_authorizations().create(body={
-                    "api_client_authorization": {'owner_uuid': new_user_uuid}}).execute()
-            except arvados.errors.ApiError as e:
-                print("(%s) Could not create API token for %s: %s" % (email, new_user_uuid, e))
+            new_user_uuid = choose_new_user(args, by_email, email, userhome, username, old_user_uuid, clusters)
+            if new_user_uuid is None:
                 continue
 
-            salted = 'v2/' + newtok["uuid"] + '/' + hmac.new(newtok["api_token"].encode(),
-                                                             msg=migratecluster.encode(),
-                                                             digestmod='sha1').hexdigest()
-            try:
-                ru = urllib.parse.urlparse(migratearv._rootDesc["rootUrl"])
-                newuser = arvados.api(host=ru.netloc, token=salted).users().current().execute()
-            except arvados.errors.ApiError as e:
-                print("(%s) Error getting user info for %s from %s: %s" % (email, new_user_uuid, migratecluster, e))
-                continue
+            # cluster where the migration is happening
+            for migratecluster in clusters:
+                migratearv = clusters[migratecluster]
 
-            try:
-                olduser = migratearv.users().get(uuid=old_user_uuid).execute()
-            except arvados.errors.ApiError as e:
-                print("(%s) Could not retrieve user %s from %s, user may have already been migrated: %s" % (email, old_user_uuid, migratecluster, e))
-                continue
+                # the user's new home cluster
+                newhomecluster = userhome[0:5]
+                homearv = clusters[newhomecluster]
 
-            if not newuser["is_active"]:
-                print("(%s) Activating user %s on %s" % (email, new_user_uuid, migratecluster))
-                try:
-                    migratearv.users().update(uuid=new_user_uuid, body={"is_active": True}).execute()
-                except arvados.errors.ApiError as e:
-                    print("(%s) Could not activate user %s on %s: %s" % (email, new_user_uuid, migratecluster, e))
+                newuser = activate_remote_user(args, email, homearv, migratearv, old_user_uuid, new_user_uuid)
+                if newuser is None:
                     continue
 
-            if olduser["is_admin"] and not newuser["is_admin"]:
-                print("(%s) Not migrating %s because user is admin but target user %s is not admin on %s" % (email, old_user_uuid, new_user_uuid, migratecluster))
-                continue
+                print("(%s) Migrating %s to %s on %s" % (email, old_user_uuid, new_user_uuid, migratecluster))
 
-            print("(%s) Migrating %s to %s on %s" % (email, old_user_uuid, new_user_uuid, migratecluster))
+                migrate_user(args, migratearv, email, new_user_uuid, old_user_uuid)
 
-            try:
-                grp = migratearv.groups().create(body={
-                    "owner_uuid": new_user_uuid,
-                    "name": "Migrated from %s (%s)" % (email, old_user_uuid),
-                    "group_class": "project"
-                }, ensure_unique_name=True).execute()
-                migratearv.users().merge(old_user_uuid=old_user_uuid,
-                                         new_user_uuid=new_user_uuid,
-                                         new_owner_uuid=grp["uuid"],
-                                         redirect_to_new_user=True).execute()
-            except arvados.errors.ApiError as e:
-                print("(%s) Error migrating user: %s" % (email, e))
+                if newuser['username'] != username:
+                    update_username(args, email, new_user_uuid, username, migratecluster, migratearv)
 
 if __name__ == "__main__":
     main()
index 5773cb4f98792354c671a3cfb3ecb90f7f92f0f9..616a94e903eba20794ac589e696b5eb7c14f50aa 100644 (file)
@@ -867,7 +867,9 @@ class ArvPutUploadJob(object):
                                           update_collection):
             try:
                 self._remote_collection = arvados.collection.Collection(
-                    update_collection, api_client=self._api_client)
+                    update_collection,
+                    api_client=self._api_client,
+                    num_retries=self.num_retries)
             except arvados.errors.ApiError as error:
                 raise CollectionUpdateError("Cannot read collection {} ({})".format(update_collection, error))
             else:
@@ -910,7 +912,8 @@ class ArvPutUploadJob(object):
                 self._state['manifest'],
                 replication_desired=self.replication_desired,
                 put_threads=self.put_threads,
-                api_client=self._api_client)
+                api_client=self._api_client,
+                num_retries=self.num_retries)
 
     def _cached_manifest_valid(self):
         """
index 4354ced67d3b4a3b678be3cacaa575f58f4f6d3f..86a28f54c402c8d44aba1d8511faab18e5e8b44a 100644 (file)
@@ -1117,7 +1117,7 @@ class KeepClient(object):
                 "{} not found".format(loc_s), service_errors)
         else:
             raise arvados.errors.KeepReadError(
-                "failed to read {}".format(loc_s), service_errors, label="service")
+                "failed to read {} after {}".format(loc_s, loop.attempts_str()), service_errors, label="service")
 
     @retry.retry_method
     def put(self, data, copies=2, num_retries=None, request_id=None):
@@ -1196,8 +1196,8 @@ class KeepClient(object):
                               for key in sorted_roots
                               if roots_map[key].last_result()['error'])
             raise arvados.errors.KeepWriteError(
-                "failed to write {} (wanted {} copies but wrote {})".format(
-                    data_hash, copies, writer_pool.done()), service_errors, label="service")
+                "failed to write {} after {} (wanted {} copies but wrote {})".format(
+                    data_hash, loop.attempts_str(), copies, writer_pool.done()), service_errors, label="service")
 
     def local_store_put(self, data, copies=1, num_retries=None):
         """A stub for put().
index 3f62ab779f81fa43537a8223b7348bed52dc3a7c..ea4095930fc78f7cbbb26c49f45a8fa66fbb4081 100644 (file)
@@ -64,6 +64,7 @@ class RetryLoop(object):
         self.max_wait = max_wait
         self.next_start_time = 0
         self.results = deque(maxlen=save_results)
+        self._attempts = 0
         self._running = None
         self._success = None
 
@@ -101,6 +102,7 @@ class RetryLoop(object):
                 "recorded a loop result after the loop finished")
         self.results.append(result)
         self._success = self.check_result(result)
+        self._attempts += 1
 
     def success(self):
         """Return the loop's end state.
@@ -118,6 +120,19 @@ class RetryLoop(object):
             raise arvados.errors.AssertionError(
                 "queried loop results before any were recorded")
 
+    def attempts(self):
+        """Return the number of attempts that have been made.
+
+        Includes successes and failures."""
+        return self._attempts
+
+    def attempts_str(self):
+        """Human-readable attempts(): 'N attempts' or '1 attempt'"""
+        if self._attempts == 1:
+            return '1 attempt'
+        else:
+            return '{} attempts'.format(self._attempts)
+
 
 def check_http_response_success(status_code):
     """Convert an HTTP status code to a loop control flag.
diff --git a/sdk/python/tests/fed-migrate/README b/sdk/python/tests/fed-migrate/README
new file mode 100644 (file)
index 0000000..83d659d
--- /dev/null
@@ -0,0 +1,16 @@
+In order to run federation migration integration test:
+
+arvbox must be in the path
+
+arv-federation-migrate should be in the path or the full path supplied
+in the 'fed_migrate' input parameter.
+
+# Create arvbox containers fedbox(1,2,3) for the federation
+$ cwltool --enable-ext arvbox-make-federation.cwl --arvbox_base ~/.arvbox > fed.json
+
+# Configure containers and run tests
+$ cwltool fed-migrate.cwl fed.json
+
+CWL for running the test is generated using cwl-ex:
+
+https://github.com/common-workflow-language/cwl-ex/
diff --git a/sdk/python/tests/fed-migrate/arvbox-make-federation.cwl b/sdk/python/tests/fed-migrate/arvbox-make-federation.cwl
new file mode 100644 (file)
index 0000000..5057d4c
--- /dev/null
@@ -0,0 +1,33 @@
+cwlVersion: v1.0
+class: Workflow
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+  cwltool: "http://commonwl.org/cwltool#"
+inputs:
+  arvbox_base: Directory
+outputs:
+  arvados_api_hosts:
+    type: string[]
+    outputSource: start/arvados_api_hosts
+  arvados_cluster_ids:
+    type: string[]
+    outputSource: start/arvados_cluster_ids
+  superuser_tokens:
+    type: string[]
+    outputSource: start/superuser_tokens
+  arvbox_containers:
+    type: string[]
+    outputSource: start/arvbox_containers
+  arvbox_bin:
+    type: File
+    outputSource: start/arvbox_bin
+requirements:
+  SubworkflowFeatureRequirement: {}
+  cwltool:LoadListingRequirement:
+    loadListing: no_listing
+steps:
+  start:
+    in:
+      arvbox_base: arvbox_base
+    out: [arvados_api_hosts, arvados_cluster_ids, arvado_api_host_insecure, superuser_tokens, arvbox_containers, arvbox_bin]
+    run: ../../../cwl/tests/federation/arvbox-make-federation.cwl
diff --git a/sdk/python/tests/fed-migrate/check.py b/sdk/python/tests/fed-migrate/check.py
new file mode 100644 (file)
index 0000000..8f494be
--- /dev/null
@@ -0,0 +1,61 @@
+import arvados
+import json
+import sys
+
+j = json.load(open(sys.argv[1]))
+
+apiA = arvados.api(host=j["arvados_api_hosts"][0], token=j["superuser_tokens"][0], insecure=True)
+apiB = arvados.api(host=j["arvados_api_hosts"][1], token=j["superuser_tokens"][1], insecure=True)
+apiC = arvados.api(host=j["arvados_api_hosts"][2], token=j["superuser_tokens"][2], insecure=True)
+
+users = apiA.users().list().execute()
+
+assert len(users["items"]) == 11
+
+by_username = {}
+
+for i in range(1, 10):
+    found = False
+    for u in users["items"]:
+        if u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i):
+            found = True
+            by_username[u["username"]] = u["uuid"]
+    assert found
+
+found = False
+for u in users["items"]:
+    if (u["username"] == "case9" and u["email"] == "case9@test" and
+        u["uuid"] == by_username[u["username"]] and u["is_active"] is False):
+        found = True
+assert found
+
+users = apiB.users().list().execute()
+assert len(users["items"]) == 11
+
+for i in range(2, 10):
+    found = False
+    for u in users["items"]:
+        if u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i) and u["uuid"] == by_username[u["username"]]:
+            found = True
+    assert found
+
+users = apiC.users().list().execute()
+assert len(users["items"]) == 8
+
+for i in (2, 4, 6, 7, 8):
+    found = False
+    for u in users["items"]:
+        if u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i) and u["uuid"] == by_username[u["username"]]:
+            found = True
+    assert found
+
+# cases 3, 5, 9 involve users that have never accessed cluster C so
+# there's nothing to migrate.
+for i in (3, 5, 9):
+    found = False
+    for u in users["items"]:
+        if u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i) and u["uuid"] == by_username[u["username"]]:
+            found = True
+    assert not found
+
+print("Passed checks")
diff --git a/sdk/python/tests/fed-migrate/create_users.py b/sdk/python/tests/fed-migrate/create_users.py
new file mode 100644 (file)
index 0000000..cea624e
--- /dev/null
@@ -0,0 +1,88 @@
+import arvados
+import json
+import sys
+
+j = json.load(open(sys.argv[1]))
+
+apiA = arvados.api(host=j["arvados_api_hosts"][0], token=j["superuser_tokens"][0], insecure=True)
+apiB = arvados.api(host=j["arvados_api_hosts"][1], token=j["superuser_tokens"][1], insecure=True)
+apiC = arvados.api(host=j["arvados_api_hosts"][2], token=j["superuser_tokens"][2], insecure=True)
+
+def maketoken(newtok):
+    return 'v2/' + newtok["uuid"] + '/' + newtok["api_token"]
+
+# case 1
+# user only exists on cluster A
+apiA.users().create(body={"user": {"email": "case1@test", "is_active": True}}).execute()
+
+# case 2
+# user exists on cluster A and has remotes on B and C
+case2 = apiA.users().create(body={"user": {"email": "case2@test", "is_active": True}}).execute()
+newtok = apiA.api_client_authorizations().create(body={
+    "api_client_authorization": {'owner_uuid': case2["uuid"]}}).execute()
+arvados.api(host=j["arvados_api_hosts"][1], token=maketoken(newtok), insecure=True).users().current().execute()
+arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtok), insecure=True).users().current().execute()
+
+# case 3
+# user only exists on cluster B
+case3 = apiB.users().create(body={"user": {"email": "case3@test", "is_active": True}}).execute()
+
+# case 4
+# user only exists on cluster B and has remotes on A and C
+case4 = apiB.users().create(body={"user": {"email": "case4@test", "is_active": True}}).execute()
+newtok = apiB.api_client_authorizations().create(body={
+    "api_client_authorization": {'owner_uuid': case4["uuid"]}}).execute()
+arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtok), insecure=True).users().current().execute()
+arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtok), insecure=True).users().current().execute()
+
+
+# case 5
+# user exists on both cluster A and B
+case5 = apiA.users().create(body={"user": {"email": "case5@test", "is_active": True}}).execute()
+case5 = apiB.users().create(body={"user": {"email": "case5@test", "is_active": True}}).execute()
+
+# case 6
+# user exists on both cluster A and B, with remotes on A, B and C
+case6_A = apiA.users().create(body={"user": {"email": "case6@test", "is_active": True}}).execute()
+newtokA = apiA.api_client_authorizations().create(body={
+    "api_client_authorization": {'owner_uuid': case6_A["uuid"]}}).execute()
+arvados.api(host=j["arvados_api_hosts"][1], token=maketoken(newtokA), insecure=True).users().current().execute()
+arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtokA), insecure=True).users().current().execute()
+
+case6_B = apiB.users().create(body={"user": {"email": "case6@test", "is_active": True}}).execute()
+newtokB = apiB.api_client_authorizations().create(body={
+    "api_client_authorization": {'owner_uuid': case6_B["uuid"]}}).execute()
+arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtokB), insecure=True).users().current().execute()
+arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtokB), insecure=True).users().current().execute()
+
+# case 7
+# user exists on both cluster B and A, with remotes on A, B and C
+case7_B = apiB.users().create(body={"user": {"email": "case7@test", "is_active": True}}).execute()
+newtokB = apiB.api_client_authorizations().create(body={
+    "api_client_authorization": {'owner_uuid': case7_B["uuid"]}}).execute()
+arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtokB), insecure=True).users().current().execute()
+arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtokB), insecure=True).users().current().execute()
+
+case7_A = apiA.users().create(body={"user": {"email": "case7@test", "is_active": True}}).execute()
+newtokA = apiA.api_client_authorizations().create(body={
+    "api_client_authorization": {'owner_uuid': case7_A["uuid"]}}).execute()
+arvados.api(host=j["arvados_api_hosts"][1], token=maketoken(newtokA), insecure=True).users().current().execute()
+arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtokA), insecure=True).users().current().execute()
+
+# case 8
+# user exists on both cluster B and C, with remotes on A, B and C
+case8_B = apiB.users().create(body={"user": {"email": "case8@test", "is_active": True}}).execute()
+newtokB = apiB.api_client_authorizations().create(body={
+    "api_client_authorization": {'owner_uuid': case8_B["uuid"]}}).execute()
+arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtokB), insecure=True).users().current().execute()
+arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtokB), insecure=True).users().current().execute()
+
+case8_C = apiC.users().create(body={"user": {"email": "case8@test", "is_active": True}}).execute()
+newtokC = apiC.api_client_authorizations().create(body={
+    "api_client_authorization": {'owner_uuid': case8_C["uuid"]}}).execute()
+arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtokC), insecure=True).users().current().execute()
+arvados.api(host=j["arvados_api_hosts"][1], token=maketoken(newtokC), insecure=True).users().current().execute()
+
+# case 9
+# user only exists on cluster B, but is inactive
+case9 = apiB.users().create(body={"user": {"email": "case9@test", "is_active": False}}).execute()
diff --git a/sdk/python/tests/fed-migrate/fed-migrate.cwl b/sdk/python/tests/fed-migrate/fed-migrate.cwl
new file mode 100644 (file)
index 0000000..1c8fcca
--- /dev/null
@@ -0,0 +1,576 @@
+#!/usr/bin/env cwl-runner
+$graph:
+  - class: Workflow
+    cwlVersion: v1.0
+    id: '#run_test'
+    inputs:
+      - id: arvados_api_hosts
+        type:
+          items: string
+          type: array
+      - id: superuser_tokens
+        type:
+          items: string
+          type: array
+      - default: arv-federation-migrate
+        id: fed_migrate
+        type: string
+    outputs:
+      - id: report3
+        outputSource: main_2/report3
+        type: File
+    requirements:
+      InlineJavascriptRequirement: {}
+      MultipleInputFeatureRequirement: {}
+      ScatterFeatureRequirement: {}
+      StepInputExpressionRequirement: {}
+      SubworkflowFeatureRequirement: {}
+    steps:
+      - id: main_1
+        in:
+          arvados_api_hosts:
+            source: arvados_api_hosts
+          create_users:
+            default:
+              class: File
+              location: create_users.py
+          superuser_tokens:
+            source: superuser_tokens
+        out:
+          - ar
+        run:
+          arguments:
+            - python
+            - $(inputs.create_users)
+            - _script
+          class: CommandLineTool
+          id: main_1_embed
+          inputs:
+            - id: arvados_api_hosts
+              type:
+                items: string
+                type: array
+            - id: superuser_tokens
+              type:
+                items: string
+                type: array
+            - id: create_users
+              type: File
+          outputs:
+            - id: ar
+              outputBinding:
+                outputEval: $(inputs.arvados_api_hosts)
+              type:
+                items: string
+                type: array
+          requirements:
+            InitialWorkDirRequirement:
+              listing:
+                - entry: |
+                    {
+                      "arvados_api_hosts": $(inputs.arvados_api_hosts),
+                      "superuser_tokens": $(inputs.superuser_tokens)
+                    }
+                  entryname: _script
+            InlineJavascriptRequirement: {}
+      - id: main_2
+        in:
+          ar:
+            source: main_1/ar
+          arvados_api_hosts:
+            source: arvados_api_hosts
+          fed_migrate:
+            source: fed_migrate
+          host:
+            valueFrom: '$(inputs.arvados_api_hosts[0])'
+          superuser_tokens:
+            source: superuser_tokens
+          token:
+            valueFrom: '$(inputs.superuser_tokens[0])'
+        out:
+          - report
+          - report2
+          - report3
+          - r
+        run:
+          class: Workflow
+          id: main_2_embed
+          inputs:
+            - id: ar
+              type:
+                items: string
+                type: array
+            - id: arvados_api_hosts
+              type:
+                items: string
+                type: array
+            - id: superuser_tokens
+              type:
+                items: string
+                type: array
+            - id: fed_migrate
+              type: string
+            - id: host
+              type: Any
+            - id: token
+              type: Any
+          outputs:
+            - id: report
+              outputSource: main_2_embed_1/report
+              type: File
+            - id: report2
+              outputSource: main_2_embed_2/report2
+              type: File
+            - id: report3
+              outputSource: main_2_embed_3/report3
+              type: File
+            - id: r
+              outputSource: main_2_embed_4/r
+              type: File
+          requirements:
+            - class: EnvVarRequirement
+              envDef:
+                ARVADOS_API_HOST: $(inputs.host)
+                ARVADOS_API_HOST_INSECURE: '1'
+                ARVADOS_API_TOKEN: $(inputs.token)
+          steps:
+            - id: main_2_embed_1
+              in:
+                fed_migrate:
+                  source: fed_migrate
+                host:
+                  source: host
+                token:
+                  source: token
+              out:
+                - report
+              run:
+                arguments:
+                  - $(inputs.fed_migrate)
+                  - '--report'
+                  - report.csv
+                class: CommandLineTool
+                id: main_2_embed_1_embed
+                inputs:
+                  - id: fed_migrate
+                    type: string
+                  - id: host
+                    type: Any
+                  - id: token
+                    type: Any
+                outputs:
+                  - id: report
+                    outputBinding:
+                      glob: report.csv
+                    type: File
+                requirements:
+                  InlineJavascriptRequirement: {}
+            - id: main_2_embed_2
+              in:
+                host:
+                  source: host
+                report:
+                  source: main_2_embed_1/report
+                token:
+                  source: token
+              out:
+                - report2
+              run:
+                arguments:
+                  - sed
+                  - '-E'
+                  - 's/,(case[1-8])2?,/,\1,/g'
+                class: CommandLineTool
+                id: main_2_embed_2_embed
+                inputs:
+                  - id: report
+                    type: File
+                  - id: host
+                    type: Any
+                  - id: token
+                    type: Any
+                outputs:
+                  - id: report2
+                    outputBinding:
+                      glob: report.csv
+                    type: File
+                requirements:
+                  InlineJavascriptRequirement: {}
+                stdin: $(inputs.report.path)
+                stdout: report.csv
+            - id: main_2_embed_3
+              in:
+                fed_migrate:
+                  source: fed_migrate
+                host:
+                  source: host
+                report2:
+                  source: main_2_embed_2/report2
+                token:
+                  source: token
+              out:
+                - report3
+              run:
+                arguments:
+                  - $(inputs.fed_migrate)
+                  - '--migrate'
+                  - $(inputs.report2)
+                class: CommandLineTool
+                id: main_2_embed_3_embed
+                inputs:
+                  - id: report2
+                    type: File
+                  - id: fed_migrate
+                    type: string
+                  - id: host
+                    type: Any
+                  - id: token
+                    type: Any
+                outputs:
+                  - id: report3
+                    outputBinding:
+                      outputEval: $(inputs.report2)
+                    type: File
+                requirements:
+                  InlineJavascriptRequirement: {}
+            - id: main_2_embed_4
+              in:
+                arvados_api_hosts:
+                  source: arvados_api_hosts
+                check:
+                  default:
+                    class: File
+                    location: check.py
+                host:
+                  source: host
+                report3:
+                  source: main_2_embed_3/report3
+                superuser_tokens:
+                  source: superuser_tokens
+                token:
+                  source: token
+              out:
+                - r
+              run:
+                arguments:
+                  - python
+                  - $(inputs.check)
+                  - _script
+                class: CommandLineTool
+                id: main_2_embed_4_embed
+                inputs:
+                  - id: report3
+                    type: File
+                  - id: host
+                    type: Any
+                  - id: token
+                    type: Any
+                  - id: arvados_api_hosts
+                    type:
+                      items: string
+                      type: array
+                  - id: superuser_tokens
+                    type:
+                      items: string
+                      type: array
+                  - id: check
+                    type: File
+                outputs:
+                  - id: r
+                    outputBinding:
+                      outputEval: $(inputs.report3)
+                    type: File
+                requirements:
+                  InitialWorkDirRequirement:
+                    listing:
+                      - entry: |
+                          {
+                            "arvados_api_hosts": $(inputs.arvados_api_hosts),
+                            "superuser_tokens": $(inputs.superuser_tokens)
+                          }
+                        entryname: _script
+                  InlineJavascriptRequirement: {}
+  - arguments:
+      - arvbox
+      - cat
+      - /var/lib/arvados/superuser_token
+    class: CommandLineTool
+    cwlVersion: v1.0
+    id: '#superuser_tok'
+    inputs:
+      - id: container
+        type: string
+    outputs:
+      - id: superuser_token
+        outputBinding:
+          glob: superuser_token.txt
+          loadContents: true
+          outputEval: '$(self[0].contents.trim())'
+        type: string
+    requirements:
+      EnvVarRequirement:
+        envDef:
+          ARVBOX_CONTAINER: $(inputs.container)
+      InlineJavascriptRequirement: {}
+    stdout: superuser_token.txt
+  - class: Workflow
+    id: '#main'
+    inputs:
+      - id: arvados_api_hosts
+        type:
+          items: string
+          type: array
+      - id: arvados_cluster_ids
+        type:
+          items: string
+          type: array
+      - id: superuser_tokens
+        type:
+          items: string
+          type: array
+      - id: arvbox_containers
+        type:
+          items: string
+          type: array
+      - default: arv-federation-migrate
+        id: fed_migrate
+        type: string
+      - id: arvbox_bin
+        type: File
+      - default: 15531-logincluster-migrate
+        id: refspec
+        type: string
+    outputs:
+      - id: supertok
+        outputSource: main_2/supertok
+        type:
+          items: string
+          type: array
+      - id: report
+        outputSource: run_test_3/report3
+        type: File
+    requirements:
+      InlineJavascriptRequirement: {}
+      MultipleInputFeatureRequirement: {}
+      ScatterFeatureRequirement: {}
+      StepInputExpressionRequirement: {}
+      SubworkflowFeatureRequirement: {}
+    steps:
+      - id: main_1
+        in:
+          arvados_cluster_ids:
+            source: arvados_cluster_ids
+        out:
+          - logincluster
+        run:
+          class: ExpressionTool
+          expression: '${return {''logincluster'': (inputs.arvados_cluster_ids[0])};}'
+          inputs:
+            - id: arvados_cluster_ids
+              type:
+                items: string
+                type: array
+          outputs:
+            - id: logincluster
+              type: string
+      - id: main_2
+        in:
+          arvbox_bin:
+            source: arvbox_bin
+          cluster_id:
+            source: arvados_cluster_ids
+          container:
+            source: arvbox_containers
+          host:
+            source: arvados_api_hosts
+          logincluster:
+            source: main_1/logincluster
+          refspec:
+            source: refspec
+        out:
+          - supertok
+        run:
+          class: Workflow
+          id: main_2_embed
+          inputs:
+            - id: container
+              type: string
+            - id: cluster_id
+              type: string
+            - id: host
+              type: string
+            - id: logincluster
+              type: string
+            - id: arvbox_bin
+              type: File
+            - id: refspec
+              type: string
+          outputs:
+            - id: supertok
+              outputSource: superuser_tok_3/superuser_token
+              type: string
+          requirements:
+            - class: EnvVarRequirement
+              envDef:
+                ARVBOX_CONTAINER: $(inputs.container)
+          steps:
+            - id: main_2_embed_1
+              in:
+                cluster_id:
+                  source: cluster_id
+                container:
+                  source: container
+                logincluster:
+                  source: logincluster
+                set_login:
+                  default:
+                    class: File
+                    location: set_login.py
+              out:
+                - c
+              run:
+                arguments:
+                  - sh
+                  - _script
+                class: CommandLineTool
+                id: main_2_embed_1_embed
+                inputs:
+                  - id: container
+                    type: string
+                  - id: cluster_id
+                    type: string
+                  - id: logincluster
+                    type: string
+                  - id: set_login
+                    type: File
+                outputs:
+                  - id: c
+                    outputBinding:
+                      outputEval: $(inputs.container)
+                    type: string
+                requirements:
+                  InitialWorkDirRequirement:
+                    listing:
+                      - entry: >
+                          set -x
+
+                          docker cp
+                          $(inputs.container):/var/lib/arvados/cluster_config.yml.override
+                          .
+
+                          chmod +w cluster_config.yml.override
+
+                          python $(inputs.set_login.path)
+                          cluster_config.yml.override $(inputs.cluster_id)
+                          $(inputs.logincluster)
+
+                          docker cp cluster_config.yml.override
+                          $(inputs.container):/var/lib/arvados
+                        entryname: _script
+                  InlineJavascriptRequirement: {}
+            - id: main_2_embed_2
+              in:
+                arvbox_bin:
+                  source: arvbox_bin
+                c:
+                  source: main_2_embed_1/c
+                container:
+                  source: container
+                host:
+                  source: host
+                refspec:
+                  source: refspec
+              out:
+                - d
+              run:
+                arguments:
+                  - sh
+                  - _script
+                class: CommandLineTool
+                id: main_2_embed_2_embed
+                inputs:
+                  - id: container
+                    type: string
+                  - id: host
+                    type: string
+                  - id: arvbox_bin
+                    type: File
+                  - id: c
+                    type: string
+                  - id: refspec
+                    type: string
+                outputs:
+                  - id: d
+                    outputBinding:
+                      outputEval: $(inputs.c)
+                    type: string
+                requirements:
+                  InitialWorkDirRequirement:
+                    listing:
+                      - entry: >
+                          set -xe
+
+                          $(inputs.arvbox_bin.path) pipe <<EOF
+
+                          cd /usr/src/arvados
+
+                          git fetch
+
+                          git checkout -f $(inputs.refspec)
+
+                          git pull
+
+                          chown arvbox:arvbox -R .
+
+                          EOF
+
+
+                          $(inputs.arvbox_bin.path) hotreset
+
+
+                          while ! curl --fail --insecure --silent
+                          https://$(inputs.host)/discovery/v1/apis/arvados/v1/rest
+                          >/dev/null ; do sleep 3 ; done
+
+                          export ARVADOS_API_HOST=$(inputs.host)
+
+                          export ARVADOS_API_TOKEN=\$($(inputs.arvbox_bin.path)
+                          cat /var/lib/arvados/superuser_token)
+
+                          export ARVADOS_API_HOST_INSECURE=1
+
+                          ARVADOS_VIRTUAL_MACHINE_UUID=\$($(inputs.arvbox_bin.path)
+                          cat /var/lib/arvados/vm-uuid)
+
+                          while ! python -c "import arvados ;
+                          arvados.api().virtual_machines().get(uuid='$ARVADOS_VIRTUAL_MACHINE_UUID').execute()"
+                          2>/dev/null ; do sleep 3; done
+                        entryname: _script
+                  InlineJavascriptRequirement: {}
+            - id: superuser_tok_3
+              in:
+                container:
+                  source: container
+                d:
+                  source: main_2_embed_2/d
+              out:
+                - superuser_token
+              run: '#superuser_tok'
+        scatter:
+          - container
+          - cluster_id
+          - host
+        scatterMethod: dotproduct
+      - id: run_test_3
+        in:
+          arvados_api_hosts:
+            source: arvados_api_hosts
+          fed_migrate:
+            source: fed_migrate
+          superuser_tokens:
+            source: main_2/supertok
+        out:
+          - report3
+        run: '#run_test'
+cwlVersion: v1.0
+
diff --git a/sdk/python/tests/fed-migrate/fed-migrate.cwlex b/sdk/python/tests/fed-migrate/fed-migrate.cwlex
new file mode 100644 (file)
index 0000000..22bc95a
--- /dev/null
@@ -0,0 +1,67 @@
+import "run-test.cwlex" as run_test
+import "superuser-tok.cwl" as superuser_tok
+
+def workflow main(
+  arvados_api_hosts string[],
+  arvados_cluster_ids string[],
+  superuser_tokens string[],
+  arvbox_containers string[],
+  fed_migrate="arv-federation-migrate",
+  arvbox_bin File,
+  refspec="15531-logincluster-migrate"
+) {
+
+  logincluster = run expr (arvados_cluster_ids) string (inputs.arvados_cluster_ids[0])
+
+  scatter arvbox_containers as container,
+          arvados_cluster_ids as cluster_id,
+         arvados_api_hosts as host
+    do run workflow(logincluster, arvbox_bin, refspec)
+  {
+    requirements {
+      EnvVarRequirement {
+        envDef: {
+          ARVBOX_CONTAINER: "$(inputs.container)"
+        }
+      }
+    }
+
+    run tool(container, cluster_id, logincluster, set_login = File("set_login.py")) {
+sh <<<
+set -x
+docker cp $(inputs.container):/var/lib/arvados/cluster_config.yml.override .
+chmod +w cluster_config.yml.override
+python $(inputs.set_login.path) cluster_config.yml.override $(inputs.cluster_id) $(inputs.logincluster)
+docker cp cluster_config.yml.override $(inputs.container):/var/lib/arvados
+>>>
+      return container as c
+    }
+    run tool(container, host, arvbox_bin, c, refspec) {
+sh <<<
+set -xe
+$(inputs.arvbox_bin.path) pipe <<EOF
+cd /usr/src/arvados
+git fetch
+git checkout -f $(inputs.refspec)
+git pull
+chown arvbox:arvbox -R .
+EOF
+
+$(inputs.arvbox_bin.path) hotreset
+
+while ! curl --fail --insecure --silent https://$(inputs.host)/discovery/v1/apis/arvados/v1/rest >/dev/null ; do sleep 3 ; done
+export ARVADOS_API_HOST=$(inputs.host)
+export ARVADOS_API_TOKEN=\$($(inputs.arvbox_bin.path) cat /var/lib/arvados/superuser_token)
+export ARVADOS_API_HOST_INSECURE=1
+ARVADOS_VIRTUAL_MACHINE_UUID=\$($(inputs.arvbox_bin.path) cat /var/lib/arvados/vm-uuid)
+while ! python -c "import arvados ; arvados.api().virtual_machines().get(uuid='$ARVADOS_VIRTUAL_MACHINE_UUID').execute()" 2>/dev/null ; do sleep 3; done
+>>>
+      return c as d
+    }
+    supertok = superuser_tok(container, d)
+    return supertok
+  }
+
+  report = run_test(arvados_api_hosts, superuser_tokens=supertok, fed_migrate)
+  return supertok, report
+}
\ No newline at end of file
diff --git a/sdk/python/tests/fed-migrate/run-test.cwlex b/sdk/python/tests/fed-migrate/run-test.cwlex
new file mode 100644 (file)
index 0000000..55ac6a7
--- /dev/null
@@ -0,0 +1,58 @@
+def workflow main(
+  arvados_api_hosts string[],
+  superuser_tokens string[],
+  fed_migrate="arv-federation-migrate"
+) {
+
+  run tool(arvados_api_hosts, superuser_tokens, create_users=File("create_users.py")) {
+    python $(inputs.create_users) <<<
+{
+  "arvados_api_hosts": $(inputs.arvados_api_hosts),
+  "superuser_tokens": $(inputs.superuser_tokens)
+}
+>>>
+    return arvados_api_hosts as ar
+  }
+
+  run workflow(ar, arvados_api_hosts, superuser_tokens,
+               fed_migrate,
+              host=$(inputs.arvados_api_hosts[0]),
+              token=$(inputs.superuser_tokens[0])) {
+    requirements {
+      EnvVarRequirement {
+        envDef: {
+          ARVADOS_API_HOST: "$(inputs.host)",
+          ARVADOS_API_TOKEN: "$(inputs.token)",
+         ARVADOS_API_HOST_INSECURE: "1"
+       }
+      }
+    }
+
+    run tool(fed_migrate, host, token) {
+      $(inputs.fed_migrate) --report report.csv
+      return File("report.csv") as report
+    }
+
+    run tool(report, host, token) {
+      sed -E 's/,(case[1-8])2?,/,\\1,/g' < $(inputs.report.path) > report.csv
+      return File("report.csv") as report2
+    }
+
+    run tool(report2, fed_migrate, host, token) {
+      $(inputs.fed_migrate) --migrate $(inputs.report2)
+      return report2 as report3
+    }
+
+    run tool(report3, host, token, arvados_api_hosts, superuser_tokens, check=File("check.py")) {
+      python $(inputs.check) <<<
+{
+  "arvados_api_hosts": $(inputs.arvados_api_hosts),
+  "superuser_tokens": $(inputs.superuser_tokens)
+}
+>>>
+    return report3 as r
+    }
+  }
+
+  return report3
+}
\ No newline at end of file
diff --git a/sdk/python/tests/fed-migrate/set_login.py b/sdk/python/tests/fed-migrate/set_login.py
new file mode 100644 (file)
index 0000000..2900af1
--- /dev/null
@@ -0,0 +1,10 @@
+import json
+import sys
+
+f = open(sys.argv[1], "r+")
+j = json.load(f)
+j["Clusters"][sys.argv[2]]["Login"] = {"LoginCluster": sys.argv[3]}
+for r in j["Clusters"][sys.argv[2]]["RemoteClusters"]:
+    j["Clusters"][sys.argv[2]]["RemoteClusters"][r]["Insecure"] = True
+f.seek(0)
+json.dump(j, f)
diff --git a/sdk/python/tests/fed-migrate/superuser-tok.cwl b/sdk/python/tests/fed-migrate/superuser-tok.cwl
new file mode 100755 (executable)
index 0000000..d2ce253
--- /dev/null
@@ -0,0 +1,19 @@
+#!/usr/bin/env cwltool
+cwlVersion: v1.0
+class: CommandLineTool
+stdout: superuser_token.txt
+inputs:
+  container: string
+outputs:
+  superuser_token:
+    type: string
+    outputBinding:
+      glob: superuser_token.txt
+      loadContents: true
+      outputEval: $(self[0].contents.trim())
+requirements:
+  EnvVarRequirement:
+    envDef:
+      ARVBOX_CONTAINER: "$(inputs.container)"
+  InlineJavascriptRequirement: {}
+arguments: [arvbox, cat, /var/lib/arvados/superuser_token]
index a760255dd6da8e01470dacafa6bcfafeddc50058..086fa542a2b28b2112f92eb49e21934247cf2710 100644 (file)
@@ -865,7 +865,7 @@ class BlockManagerTest(unittest.TestCase):
     def test_bufferblock_commit_pending(self):
         # Test for bug #7225
         mockkeep = mock.MagicMock()
-        mockkeep.put.side_effect = lambda x: time.sleep(1)
+        mockkeep.put.side_effect = lambda *args, **kwargs: time.sleep(1)
         with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
             bufferblock = blockmanager.alloc_bufferblock()
             bufferblock.append("foo")
index 80e6987b38bbaa7068db6a4978ef0c85a579bf96..68158d760ee785a501d75e931ac5635109f32c13 100644 (file)
@@ -424,15 +424,16 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
     def check_errors_from_last_retry(self, verb, exc_class):
         api_client = self.mock_keep_services(count=2)
         req_mock = tutil.mock_keep_responses(
-            "retry error reporting test", 500, 500, 403, 403)
+            "retry error reporting test", 500, 500, 500, 500, 500, 500, 502, 502)
         with req_mock, tutil.skip_sleep, \
                 self.assertRaises(exc_class) as err_check:
             keep_client = arvados.KeepClient(api_client=api_client)
             getattr(keep_client, verb)('d41d8cd98f00b204e9800998ecf8427e+0',
                                        num_retries=3)
-        self.assertEqual([403, 403], [
+        self.assertEqual([502, 502], [
                 getattr(error, 'status_code', None)
                 for error in err_check.exception.request_errors().values()])
+        self.assertRegex(str(err_check.exception), r'failed to (read|write) .* after 4 attempts')
 
     def test_get_error_reflects_last_retry(self):
         self.check_errors_from_last_retry('get', arvados.errors.KeepReadError)
@@ -1031,7 +1032,9 @@ class KeepClientRetryTestMixin(object):
     def check_exception(self, error_class=None, *args, **kwargs):
         if error_class is None:
             error_class = self.DEFAULT_EXCEPTION
-        self.assertRaises(error_class, self.run_method, *args, **kwargs)
+        with self.assertRaises(error_class) as err:
+            self.run_method(*args, **kwargs)
+        return err
 
     def test_immediate_success(self):
         with self.TEST_PATCHER(self.DEFAULT_EXPECT, 200):
@@ -1055,7 +1058,8 @@ class KeepClientRetryTestMixin(object):
 
     def test_error_after_retries_exhausted(self):
         with self.TEST_PATCHER(self.DEFAULT_EXPECT, 500, 500, 200):
-            self.check_exception(num_retries=1)
+            err = self.check_exception(num_retries=1)
+        self.assertRegex(str(err.exception), r'failed to .* after 2 attempts')
 
     def test_num_retries_instance_fallback(self):
         self.client_kwargs['num_retries'] = 3
index f236ce83a30a47e51c8e8499abb7b5aecfe29c56..e29deba6c9de314f1deb9c14fd6e723c8e085ac9 100644 (file)
@@ -207,7 +207,7 @@ module Arv
         loop do
           ii = (lo + hi) / 2
           range = @ranges[ii]
-          if range.include?(target)
+          if range.include?(target) && (target < range.end || ii == hi-1)
             return ii
           elsif ii == lo
             raise RangeError.new("%i not in segment" % target)
@@ -481,14 +481,13 @@ module Arv
 
       def initialize(name)
         @name = name
-        @loc_ranges = {}
+        @loc_ranges = []
         @loc_range_start = 0
         @file_specs = []
       end
 
       def add_file(coll_file)
         coll_file.each_segment do |segment|
-          extend_locator_ranges(segment.locators)
           extend_file_specs(coll_file.name, segment)
         end
       end
@@ -498,48 +497,51 @@ module Arv
           ""
         else
           "%s %s %s\n" % [escape_name(@name),
-                          @loc_ranges.keys.join(" "),
+                          @loc_ranges.collect(&:locator).join(" "),
                           @file_specs.join(" ")]
         end
       end
 
       private
 
-      def extend_locator_ranges(locators)
-        locators.
-            select { |loc_s| not @loc_ranges.include?(loc_s) }.
-            each do |loc_s|
-          @loc_ranges[loc_s] = LocatorRange.new(loc_s, @loc_range_start)
-          @loc_range_start = @loc_ranges[loc_s].end
+      def extend_file_specs(filename, segment)
+        found_overlap = false
+        # Find the longest prefix of segment.locators that's a suffix
+        # of the existing @loc_ranges. If we find one, drop those
+        # locators (they'll be added back below, when we're handling
+        # the normal/no-overlap case).
+        (1..segment.locators.length).each do |overlap|
+          if @loc_ranges.length >= overlap && @loc_ranges[-overlap..-1].collect(&:locator) == segment.locators[0..overlap-1]
+            (1..overlap).each do
+              discarded = @loc_ranges.pop
+              @loc_range_start -= (discarded.end - discarded.begin)
+            end
+            found_overlap = true
+            break
+          end
         end
-      end
 
-      def extend_file_specs(filename, segment)
-        # Given a filename and a LocatorSegment, add the smallest
-        # possible array of file spec strings to @file_specs that
-        # builds the file from available locators.
-        filename = escape_name(filename)
-        start_pos = segment.start_pos
-        length = segment.length
-        start_loc = segment.locators.first
-        prev_loc = start_loc
-        # Build a list of file specs by iterating through the segment's
-        # locators and preparing a file spec for each contiguous range.
-        segment.locators[1..-1].each do |loc_s|
-          range = @loc_ranges[loc_s]
-          if range.begin != @loc_ranges[prev_loc].end
-            range_start, range_length =
-              start_and_length_at(start_loc, prev_loc, start_pos, length)
-            @file_specs << "#{range_start}:#{range_length}:#{filename}"
-            start_pos = 0
-            length -= range_length
-            start_loc = loc_s
+        # If there was no overlap at the end of our existing
+        # @loc_ranges, check whether the full set of segment.locators
+        # appears earlier in @loc_ranges. If so, use those instead of
+        # appending the same locators again.
+        if !found_overlap && segment.locators.length < @loc_ranges.length
+          segment_start = 0
+          (0..@loc_ranges.length-1).each do |ri|
+            if @loc_ranges[ri..ri+segment.locators.length-1].collect(&:locator) == segment.locators
+              @file_specs << "#{segment.start_pos + @loc_ranges[ri].begin}:#{segment.length}:#{escape_name(filename)}"
+              return
+            end
           end
-          prev_loc = loc_s
         end
-        range_start, range_length =
-          start_and_length_at(start_loc, prev_loc, start_pos, length)
-        @file_specs << "#{range_start}:#{range_length}:#{filename}"
+
+        segment_start = @loc_range_start
+        segment.locators.each do |loc_s|
+          r = LocatorRange.new(loc_s, @loc_range_start)
+          @loc_ranges << r
+          @loc_range_start = r.end
+        end
+        @file_specs << "#{segment.start_pos + segment_start}:#{segment.length}:#{escape_name(filename)}"
       end
 
       def escape_name(name)
@@ -547,12 +549,6 @@ module Arv
           s.each_byte.map { |c| "\\%03o" % c }.join("")
         end
       end
-
-      def start_and_length_at(start_key, end_key, start_pos, length)
-        range_begin = @loc_ranges[start_key].begin + start_pos
-        range_length = [@loc_ranges[end_key].end - range_begin, length].min
-        [range_begin, range_length]
-      end
     end
   end
 end
index 288fd263fa8bdbe69cff943446dd2c0f3db9bc04..8b747c365211b742d303a2d2894129253d830d37 100644 (file)
@@ -15,6 +15,10 @@ class CollectionTest < Minitest::Test
      "./s1 #{TWO_BY_TWO_BLOCKS.last} 0:5:f1 5:4:f3\n"]
   TWO_BY_TWO_MANIFEST_S = TWO_BY_TWO_MANIFEST_A.join("")
 
+  def abcde_blocks
+    ["aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+9", "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb+9", "cccccccccccccccccccccccccccccccc+9", "dddddddddddddddddddddddddddddddd+9", "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee+9"]
+  end
+
   ### .new
 
   def test_empty_construction
@@ -32,6 +36,29 @@ class CollectionTest < Minitest::Test
     end
   end
 
+  def test_range_edge_cases
+    [
+      ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1\n",
+      ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1 0:0:file2\n",
+      ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1 0:0:file1\n",
+      ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1 0:0:file2 0:0:file1\n",
+      ". 0cc175b9c0f1b6a831c399e269772661+1 0:0:file1 1:0:file2 1:0:file1\n",
+    ].each do |txt|
+      coll = Arv::Collection.new(txt)
+      coll.normalize
+      assert_match(/ 0:0:file1/, coll.manifest_text)
+    end
+    [
+      ". d41d8cd98f00b204e9800998ecf8427e+0 1:0:file1\n",
+      ". 0cc175b9c0f1b6a831c399e269772661+1 0:0:file1 2:0:file2 1:0:file1\n",
+    ].each do |txt|
+      assert_raises(RangeError) do
+        coll = Arv::Collection.new(txt)
+        coll.normalize
+      end
+    end
+  end
+
   def test_non_manifest_construction_error
     ["word", ". abc def", ". #{random_block} 0:", ". / !"].each do |m_text|
       assert_raises(ArgumentError,
@@ -145,12 +172,12 @@ class CollectionTest < Minitest::Test
     test_normalization_file_spans_two_whole_blocks("2:3:f1 2:3:f1", 1)
   end
 
-  def test_normalization_dedups_locators
+  def test_normalization_handles_duplicate_locator
     blocks = random_blocks(2, 5)
     coll = Arv::Collection.new(". %s %s 1:8:f1 11:8:f1\n" %
                                [blocks.join(" "), blocks.reverse.join(" ")])
     coll.normalize
-    assert_equal(". #{blocks.join(' ')} 1:8:f1 6:4:f1 0:4:f1\n",
+    assert_equal(". #{blocks.join(' ')} #{blocks[0]} 1:8:f1 6:8:f1\n",
                  coll.manifest_text)
   end
 
@@ -395,6 +422,24 @@ class CollectionTest < Minitest::Test
                  dst_coll.manifest_text)
   end
 
+  def test_copy_with_repeated_blocks
+    blocks = abcde_blocks
+    src_coll = Arv::Collection.new(". #{blocks[0]} #{blocks[1]} #{blocks[2]} #{blocks[0]} #{blocks[1]} #{blocks[2]} #{blocks[3]} #{blocks[4]} 27:27:f1\n")
+    dst_coll = Arv::Collection.new()
+    dst_coll.cp_r("f1", "./", src_coll)
+    assert_equal(". #{blocks[0]} #{blocks[1]} #{blocks[2]} 0:27:f1\n", dst_coll.manifest_text, "mangled by cp_r")
+  end
+
+  def test_copy_with_repeated_split_blocks
+    blocks = abcde_blocks
+    src_coll = Arv::Collection.new(". #{blocks[0]} #{blocks[1]} #{blocks[2]} #{blocks[0]} #{blocks[1]} #{blocks[2]} #{blocks[3]} #{blocks[4]} 20:27:f1\n")
+    dst_coll = Arv::Collection.new()
+    src_coll.normalize
+    assert_equal(". #{blocks[2]} #{blocks[0]} #{blocks[1]} #{blocks[2]} 2:27:f1\n", src_coll.manifest_text, "mangled by normalize()")
+    dst_coll.cp_r("f1", "./", src_coll)
+    assert_equal(". #{blocks[2]} #{blocks[0]} #{blocks[1]} #{blocks[2]} 2:27:f1\n", dst_coll.manifest_text, "mangled by cp_r")
+  end
+
   def test_copy_empty_source_path_raises_ArgumentError(src="", dst="./s1")
     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
     assert_raises(ArgumentError) do
index 804d2a479d3d4701489c8d1bed812c7a6873c252..f166505bc07ea82703fb8673c919119003cc20c2 100644 (file)
@@ -25,9 +25,6 @@ group :test, :development do
   gem 'byebug'
 end
 
-# We need this dependency because of crunchv1
-gem 'arvados-cli'
-
 gem 'pg', '~> 1.0'
 
 gem 'multi_json'
@@ -58,7 +55,12 @@ gem 'faye-websocket'
 
 gem 'themes_for_rails', git: 'https://github.com/curoverse/themes_for_rails'
 
-gem 'arvados', '>= 1.3.1.20190301212059'
+# We need arvados-cli because of crunchv1. Note: bundler can't handle
+# two gems with the same "git" url but different "glob" values, hence
+# the use of a wildcard here instead of literal paths
+# (sdk/cli/arvados-cli.gem and sdk/ruby/arvados.gem).
+gem 'arvados-cli', git: 'https://github.com/curoverse/arvados.git', glob: 'sdk/*/*.gemspec'
+gem 'arvados', git: 'https://github.com/curoverse/arvados.git', glob: 'sdk/*/*.gemspec'
 gem 'httpclient'
 
 gem 'sshkey'
@@ -73,6 +75,10 @@ gem 'rails-controller-testing'
 
 gem 'sass-rails'
 
+# arvados-google-api-client and googleauth depend on signet, but
+# signet 0.12 is incompatible with ruby 2.3.
+gem 'signet', '< 0.12'
+
 # Install any plugin gems
 Dir.glob(File.join(File.dirname(__FILE__), 'lib', '**', "Gemfile")) do |f|
     eval(IO.read(f), binding)
index b5ac62c9fb397dfe2d429ab365bce8ea7b5c1aba..5ebdff0ca725e5f05aab1b496bec21165427666a 100644 (file)
@@ -1,3 +1,27 @@
+GIT
+  remote: https://github.com/curoverse/arvados.git
+  revision: dd9f2403f43bcb93da5908ddde57d8c0491bb4c2
+  glob: sdk/*/*.gemspec
+  specs:
+    arvados (1.4.1.20191019025325)
+      activesupport (>= 3)
+      andand (~> 1.3, >= 1.3.3)
+      arvados-google-api-client (>= 0.7, < 0.8.9)
+      faraday (< 0.16)
+      i18n (~> 0)
+      json (>= 1.7.7, < 3)
+      jwt (>= 0.1.5, < 2)
+    arvados-cli (1.4.1.20191017145711)
+      activesupport (>= 3.2.13, < 5.1)
+      andand (~> 1.3, >= 1.3.3)
+      arvados (>= 1.4.1.20190320201707)
+      arvados-google-api-client (~> 0.6, >= 0.6.3, < 0.8.9)
+      curb (~> 0.8)
+      faraday (< 0.16)
+      json (>= 1.7.7, < 3)
+      oj (~> 3.0)
+      optimist (~> 3.0)
+
 GIT
   remote: https://github.com/curoverse/themes_for_rails
   revision: ddf6e592b3b6493ea0c2de7b5d3faa120ed35be0
@@ -49,27 +73,11 @@ GEM
       activemodel (>= 3.0.0)
       activesupport (>= 3.0.0)
       rack (>= 1.1.0)
-    addressable (2.6.0)
-      public_suffix (>= 2.0.2, < 4.0)
+    addressable (2.7.0)
+      public_suffix (>= 2.0.2, < 5.0)
     andand (1.3.3)
     arel (7.1.4)
-    arvados (1.3.1.20190320201707)
-      activesupport (>= 3)
-      andand (~> 1.3, >= 1.3.3)
-      arvados-google-api-client (>= 0.7, < 0.8.9)
-      i18n (~> 0)
-      json (>= 1.7.7, < 3)
-      jwt (>= 0.1.5, < 2)
-    arvados-cli (1.3.1.20190320201707)
-      activesupport (>= 3.2.13, < 5.1)
-      andand (~> 1.3, >= 1.3.3)
-      arvados (~> 1.3.0, >= 1.3.0)
-      arvados-google-api-client (~> 0.6, >= 0.6.3, < 0.8.9)
-      curb (~> 0.8)
-      json (>= 1.7.7, < 3)
-      oj (~> 3.0)
-      optimist (~> 3.0)
-    arvados-google-api-client (0.8.7.2)
+    arvados-google-api-client (0.8.7.3)
       activesupport (>= 3.2, < 5.1)
       addressable (~> 2.3)
       autoparse (~> 0.3)
@@ -94,7 +102,7 @@ GEM
       net-ssh-gateway (>= 1.1.0)
     concurrent-ruby (1.1.5)
     crass (1.0.4)
-    curb (0.9.9)
+    curb (0.9.10)
     database_cleaner (1.7.0)
     erubis (2.7.0)
     eventmachine (1.2.7)
@@ -113,7 +121,7 @@ GEM
     ffi (1.9.25)
     globalid (0.4.2)
       activesupport (>= 4.2.0)
-    googleauth (0.8.0)
+    googleauth (0.9.0)
       faraday (~> 0.12)
       jwt (>= 1.4, < 3.0)
       memoist (~> 0.16)
@@ -153,9 +161,9 @@ GEM
     minitest (5.10.3)
     mocha (1.8.0)
       metaclass (~> 0.0.1)
-    multi_json (1.13.1)
+    multi_json (1.14.1)
     multi_xml (0.6.0)
-    multipart-post (2.0.0)
+    multipart-post (2.1.1)
     net-scp (2.0.0)
       net-ssh (>= 2.6.5, < 6.0.0)
     net-sftp (2.1.2)
@@ -172,7 +180,7 @@ GEM
       multi_json (~> 1.3)
       multi_xml (~> 0.5)
       rack (>= 1.2, < 3)
-    oj (3.7.11)
+    oj (3.9.2)
     omniauth (1.4.3)
       hashie (>= 1.2, < 4)
       rack (>= 1.6.2, < 3)
@@ -180,13 +188,13 @@ GEM
       oauth2 (~> 1.1)
       omniauth (~> 1.2)
     optimist (3.0.0)
-    os (1.0.0)
+    os (1.0.1)
     passenger (6.0.2)
       rack
       rake (>= 0.8.1)
     pg (1.1.4)
     power_assert (1.1.4)
-    public_suffix (3.0.3)
+    public_suffix (4.0.1)
     rack (2.0.7)
     rack-test (0.6.3)
       rack (>= 1.0)
@@ -288,8 +296,8 @@ PLATFORMS
 DEPENDENCIES
   acts_as_api
   andand
-  arvados (>= 1.3.1.20190301212059)
-  arvados-cli
+  arvados!
+  arvados-cli!
   byebug
   database_cleaner
   factory_bot_rails
@@ -316,6 +324,7 @@ DEPENDENCIES
   rvm-capistrano
   safe_yaml
   sass-rails
+  signet (< 0.12)
   simplecov (~> 0.7.1)
   simplecov-rcov
   sshkey
index 14abfae039a6ead9014206a9705ecdc36875247a..aee5d1f9516c038a45fe37f19615eca77e52d44b 100644 (file)
@@ -33,7 +33,10 @@ class Arvados::V1::SchemaController < ApplicationController
         id: "arvados:v1",
         name: "arvados",
         version: "v1",
-        revision: "20131114",
+        # format is YYYYMMDD, must be fixed with (needs to be linearly
+        # sortable), updated manually, may be used by clients to
+        # determine availability of API server features.
+        revision: "20190926",
         source_version: AppVersion.hash,
         sourceVersion: AppVersion.hash, # source_version should be deprecated in the future
         packageVersion: AppVersion.package_version,
@@ -401,6 +404,28 @@ class Arvados::V1::SchemaController < ApplicationController
           end
         end
       end
+
+      discovery[:resources]['configs'] = {
+        methods: {
+          get: {
+            id: "arvados.configs.get",
+            path: "config",
+            httpMethod: "GET",
+            description: "Get public config",
+            parameters: {
+            },
+            parameterOrder: [
+            ],
+            response: {
+            },
+            scopes: [
+              "https://api.curoverse.com/auth/arvados",
+              "https://api.curoverse.com/auth/arvados.readonly"
+            ]
+          },
+        }
+      }
+
       Rails.configuration.API.DisabledAPIs.each do |method, _|
         ctrl, action = method.to_s.split('.', 2)
         discovery[:resources][ctrl][:methods].delete(action.to_sym)
index 4a345f363be8da15055f52d54dcfb929f6687298..2889eacee644ba080439faa6a0e17ad629c8171c 100644 (file)
@@ -176,17 +176,10 @@ class Arvados::V1::UsersController < ApplicationController
       return send_error("cannot move objects into supplied new_owner_uuid: new user does not have write permission", status: 403)
     end
 
-    redirect = params[:redirect_to_new_user]
-    if @object.uuid[0..4] != Rails.configuration.ClusterID && redirect
-      return send_error("cannot merge remote user to other with redirect_to_new_user=true", status: 422)
-    end
-
-    if !redirect
-      return send_error("merge with redirect_to_new_user=false is not yet supported", status: 422)
-    end
-
     act_as_system_user do
-      @object.merge(new_owner_uuid: params[:new_owner_uuid], redirect_to_user_uuid: redirect && new_user.uuid)
+      @object.merge(new_owner_uuid: params[:new_owner_uuid],
+                    new_user_uuid: new_user.uuid,
+                    redirect_to_new_user: params[:redirect_to_new_user])
     end
     show
   end
index b1c02b2a76a1597f7caf0837cf609b2deffa1eac..fdb83a03751b1b81e45b0e9cd71ec910a81f7cca 100644 (file)
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 module CommitsHelper
+  extend CurrentApiClient
 
   class GitError < RequestError
     def http_status
index 55db16a4b5e3e81fe407263d0dda69cb1dce9c35..e84a3d218779cd4872c3a2a06a0f610a2457d9ec 100644 (file)
@@ -92,9 +92,11 @@ class ApiClientAuthorization < ArvadosModel
        uuid_prefix+".arvadosapi.com")
   end
 
-  def self.make_http_client
+  def self.make_http_client(uuid_prefix:)
     clnt = HTTPClient.new
-    if Rails.configuration.TLS.Insecure
+
+    if uuid_prefix && (Rails.configuration.RemoteClusters[uuid_prefix].andand.Insecure ||
+                       Rails.configuration.RemoteClusters['*'].andand.Insecure)
       clnt.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
     else
       # Use system CA certificates
@@ -167,7 +169,7 @@ class ApiClientAuthorization < ArvadosModel
       # by a remote cluster when the token absent or expired in our
       # database.  To begin, we need to ask the cluster that issued
       # the token to [re]validate it.
-      clnt = ApiClientAuthorization.make_http_client
+      clnt = ApiClientAuthorization.make_http_client(uuid_prefix: token_uuid_prefix)
 
       host = remote_host(uuid_prefix: token_uuid_prefix)
       if !host
index 60f2632029dc4bf2cf2ead9dc19522551b8bbb10..f76f5e47a5c68c7cb338a1e2bc836d80bf05a314 100644 (file)
@@ -7,6 +7,7 @@ class KeepService < ArvadosModel
   include KindAndEtag
   include CommonApiTemplate
   extend DbCurrentTime
+  extend CurrentApiClient
 
   SERVER_START_TIME = db_current_time
 
index e6a0795402b36415cc6bc4019a6b760fd4396435..46f2de6ee44f6dab98b315e66ede50296d1b4b84 100644 (file)
@@ -92,7 +92,7 @@ class Repository < ArvadosModel
     end
     if not (/^#{prefix_match}[A-Za-z][A-Za-z0-9]*$/.match(name))
       errors.add(:name,
-                 "#{errmsg_start} a letter followed by alphanumerics")
+                 "#{errmsg_start} a letter followed by alphanumerics, expected pattern '#{prefix_match}[A-Za-z][A-Za-z0-9]*' but was '#{name}'")
       false
     end
   end
index 4493f038cd1c03e5e265d973ed774e7223eb43e4..564274bc99f5e641b3fe1d0df4ba00967d8747af 100644 (file)
@@ -272,45 +272,87 @@ class User < ArvadosModel
     end
   end
 
-  # Move this user's (i.e., self's) owned items into new_owner_uuid.
-  # Also redirect future uses of this account to
-  # redirect_to_user_uuid, i.e., when a caller authenticates to this
-  # account in the future, the account redirect_to_user_uuid account
-  # will be used instead.
+  # Move this user's (i.e., self's) owned items to new_owner_uuid and
+  # new_user_uuid (for things normally owned directly by the user).
+  #
+  # If redirect_auth is true, also reassign auth tokens and ssh keys,
+  # and redirect this account to redirect_to_user_uuid, i.e., when a
+  # caller authenticates to this account in the future, the account
+  # redirect_to_user_uuid account will be used instead.
   #
   # current_user must have admin privileges, i.e., the caller is
   # responsible for checking permission to do this.
-  def merge(new_owner_uuid:, redirect_to_user_uuid:)
+  def merge(new_owner_uuid:, new_user_uuid:, redirect_to_new_user:)
     raise PermissionDeniedError if !current_user.andand.is_admin
-    raise "not implemented" if !redirect_to_user_uuid
+    raise "Missing new_owner_uuid" if !new_owner_uuid
+    raise "Missing new_user_uuid" if !new_user_uuid
     transaction(requires_new: true) do
       reload
       raise "cannot merge an already merged user" if self.redirect_to_user_uuid
 
-      new_user = User.where(uuid: redirect_to_user_uuid).first
+      new_user = User.where(uuid: new_user_uuid).first
       raise "user does not exist" if !new_user
       raise "cannot merge to an already merged user" if new_user.redirect_to_user_uuid
 
-      # Existing API tokens are updated to authenticate to the new
-      # user.
-      ApiClientAuthorization.
-        where(user_id: id).
-        update_all(user_id: new_user.id)
+      # If 'self' is a remote user, don't transfer authorizations
+      # (i.e. ability to access the account) to the new user, because
+      # that gives the remote site the ability to access the 'new'
+      # user account that takes over the 'self' account.
+      #
+      # If 'self' is a local user, it is okay to transfer
+      # authorizations, even if the 'new' user is a remote account,
+      # because the remote site does not gain the ability to access an
+      # account it could not before.
+
+      if redirect_to_new_user and self.uuid[0..4] == Rails.configuration.ClusterID
+        # Existing API tokens and ssh keys are updated to authenticate
+        # to the new user.
+        ApiClientAuthorization.
+          where(user_id: id).
+          update_all(user_id: new_user.id)
+
+        user_updates = [
+          [AuthorizedKey, :owner_uuid],
+          [AuthorizedKey, :authorized_user_uuid],
+          [Link, :owner_uuid],
+          [Link, :tail_uuid],
+          [Link, :head_uuid],
+        ]
+      else
+        # Destroy API tokens and ssh keys associated with the old
+        # user.
+        ApiClientAuthorization.where(user_id: id).destroy_all
+        AuthorizedKey.where(owner_uuid: uuid).destroy_all
+        AuthorizedKey.where(authorized_user_uuid: uuid).destroy_all
+        user_updates = [
+          [Link, :owner_uuid],
+          [Link, :tail_uuid]
+        ]
+      end
 
       # References to the old user UUID in the context of a user ID
       # (rather than a "home project" in the project hierarchy) are
       # updated to point to the new user.
-      [
-        [AuthorizedKey, :owner_uuid],
-        [AuthorizedKey, :authorized_user_uuid],
-        [Repository, :owner_uuid],
-        [Link, :owner_uuid],
-        [Link, :tail_uuid],
-        [Link, :head_uuid],
-      ].each do |klass, column|
+      user_updates.each do |klass, column|
         klass.where(column => uuid).update_all(column => new_user.uuid)
       end
 
+      # Need to update repository names to new username
+      if username
+        old_repo_name_re = /^#{Regexp.escape(username)}\//
+        Repository.where(:owner_uuid => uuid).each do |repo|
+          repo.owner_uuid = new_user.uuid
+          repo_name_sub = "#{new_user.username}/"
+          name = repo.name.sub(old_repo_name_re, repo_name_sub)
+          while (conflict = Repository.where(:name => name).first) != nil
+            repo_name_sub += "migrated"
+            name = repo.name.sub(old_repo_name_re, repo_name_sub)
+          end
+          repo.name = name
+          repo.save!
+        end
+      end
+
       # References to the merged user's "home project" are updated to
       # point to new_owner_uuid.
       ActiveRecord::Base.descendants.reject(&:abstract_class?).each do |klass|
@@ -323,7 +365,9 @@ class User < ArvadosModel
         klass.where(owner_uuid: uuid).update_all(owner_uuid: new_owner_uuid)
       end
 
-      update_attributes!(redirect_to_user_uuid: new_user.uuid)
+      if redirect_to_new_user
+        update_attributes!(redirect_to_user_uuid: new_user.uuid, username: nil)
+      end
       invalidate_permissions_cache
     end
   end
index a2e6df8b580c0fc8c80f7a97188699870352f754..dc9ed461dd95831a1f67318f2eeb47c9c63d0e21 100644 (file)
@@ -4,7 +4,12 @@
 
 require 'current_api_client'
 
-include CurrentApiClient
+# This is needed instead of just including CurrentApiClient so that its
+# methods don't get imported as Object's class methods; this is a problem because
+# the methods would be imported only on test environment. See #15716 for more info.
+class CurrentApiClientHelper
+  extend CurrentApiClient
+end
 
 def has_symbols? x
   if x.is_a? Hash
@@ -83,7 +88,7 @@ namespace :symbols do
      Node, PipelineInstance, PipelineTemplate,
      Repository, Specimen, Trait, User, VirtualMachine,
      Workflow].each do |klass|
-      act_as_system_user do
+      CurrentApiClientHelper.act_as_system_user do
         klass.all.each do |c|
           check_for_serialized_symbols c
         end
@@ -99,7 +104,7 @@ namespace :symbols do
      Node, PipelineInstance, PipelineTemplate,
      Repository, Specimen, Trait, User, VirtualMachine,
      Workflow].each do |klass|
-      act_as_system_user do
+      CurrentApiClientHelper.act_as_system_user do
         klass.all.each do |c|
           stringify_serialized_symbols c
         end
index 60696b98a9c998be7e270fe8bd3fea8cc72bd450..d5db1039645cbadffc45d93317cc87664b889b38 100644 (file)
@@ -817,14 +817,46 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
     end
   end
 
-  test "refuse to merge with redirect_to_user_uuid=false (not yet supported)" do
+  test "merge with redirect_to_user_uuid=false" do
     authorize_with :project_viewer_trustedclient
+    tok = api_client_authorizations(:project_viewer).api_token
     post :merge, params: {
            new_user_token: api_client_authorizations(:active_trustedclient).api_token,
            new_owner_uuid: users(:active).uuid,
            redirect_to_new_user: false,
          }
-    assert_response(422)
+    assert_response(:success)
+    assert_nil(User.unscoped.find_by_uuid(users(:project_viewer).uuid).redirect_to_user_uuid)
+
+    # because redirect_to_new_user=false, token owned by
+    # project_viewer should be deleted
+    auth = ApiClientAuthorization.validate(token: tok)
+    assert_nil(auth)
+  end
+
+  test "merge remote to local as admin" do
+    authorize_with :admin
+
+    remoteuser = User.create!(uuid: "zbbbb-tpzed-remotremotremot")
+    tok = ApiClientAuthorization.create!(user: remoteuser, api_client: api_clients(:untrusted)).api_token
+
+    auth = ApiClientAuthorization.validate(token: tok)
+    assert_not_nil(auth)
+    assert_nil(remoteuser.redirect_to_user_uuid)
+
+    post :merge, params: {
+           new_user_uuid: users(:active).uuid,
+           old_user_uuid: remoteuser.uuid,
+           new_owner_uuid: users(:active).uuid,
+           redirect_to_new_user: true,
+         }
+    assert_response(:success)
+    remoteuser.reload
+    assert_equal(users(:active).uuid, remoteuser.redirect_to_user_uuid)
+
+    # token owned by remoteuser should be deleted
+    auth = ApiClientAuthorization.validate(token: tok)
+    assert_nil(auth)
   end
 
   test "refuse to merge user into self" do
index 6b74154073d5edce800efaeeb7c666b1180af4b5..11ebb3f4fd7c96c61f0aae2be6c968b973364c87 100644 (file)
@@ -268,6 +268,7 @@ class UsersTest < ActionDispatch::IntegrationTest
       headers: auth(:active))
     assert_response(:success)
     assert_equal(users(:project_viewer).uuid, json_response['owner_uuid'])
+    assert_equal("#{users(:project_viewer).username}/foo", json_response['name'])
 
     get('/arvados/v1/groups/' + groups(:aproject).uuid,
       params: {},
@@ -303,4 +304,39 @@ class UsersTest < ActionDispatch::IntegrationTest
     assert_equal 'barney', json_response['username']
   end
 
+  test 'merge with repository name conflict' do
+    post('/arvados/v1/groups',
+      params: {
+        group: {
+          group_class: 'project',
+          name: "active user's stuff",
+        },
+      },
+      headers: auth(:project_viewer))
+    assert_response(:success)
+    project_uuid = json_response['uuid']
+
+    post('/arvados/v1/repositories/',
+         params: { :repository => { :name => "#{users(:project_viewer).username}/foo", :owner_uuid => users(:project_viewer).uuid } },
+         headers: auth(:project_viewer))
+    assert_response(:success)
+
+    post('/arvados/v1/users/merge',
+      params: {
+        new_user_token: api_client_authorizations(:project_viewer_trustedclient).api_token,
+        new_owner_uuid: project_uuid,
+        redirect_to_new_user: true,
+      },
+      headers: auth(:active_trustedclient))
+    assert_response(:success)
+
+    get('/arvados/v1/repositories/' + repositories(:foo).uuid,
+      params: {},
+      headers: auth(:active))
+    assert_response(:success)
+    assert_equal(users(:project_viewer).uuid, json_response['owner_uuid'])
+    assert_equal("#{users(:project_viewer).username}/migratedfoo", json_response['name'])
+
+  end
+
 end
index 728c6fded1ac6f372f30b7a4da78da7c6a2f7199..2eba5efbfd242d46109b0f8dfa0092a59c618d62 100644 (file)
@@ -14,6 +14,11 @@ StartLimitInterval=0
 StartLimitIntervalSec=0
 
 [Service]
+# trigger Go garbage collection when the ratio of freshly allocated data to live data
+# remaining after the previous collection reaches 10% rather than the default 100%, so
+# that Keepstore's memory use is tightly coupled to the number of buffers it is
+# configured to use.
+Environment=GOGC=10
 Type=notify
 ExecStart=/usr/bin/keepstore
 Restart=always
index f474a6b23be33fdd58135396c1ca66e79611232e..246229decf0bc7d9bf717a8673019c94c33ed430 100755 (executable)
@@ -19,11 +19,13 @@ if ! which docker >/dev/null 2>/dev/null ; then
 fi
 
 if test -z "$ARVBOX_DOCKER" ; then
+    set +e
     if which greadlink >/dev/null 2>/dev/null ; then
         ARVBOX_DOCKER=$(greadlink -f $(dirname $0)/../lib/arvbox/docker)
     else
         ARVBOX_DOCKER=$(readlink -f $(dirname $0)/../lib/arvbox/docker)
     fi
+    set -e
 fi
 
 if test -z "$ARVBOX_CONTAINER" ; then
@@ -596,6 +598,39 @@ case "$subcmd" in
        exec docker exec -ti $ARVBOX_CONTAINER bash -c 'PGPASSWORD=$(cat /var/lib/arvados/api_database_pw) exec psql --dbname=arvados_development --host=localhost --username=arvados'
        ;;
 
+    checkpoint)
+       exec docker exec -ti $ARVBOX_CONTAINER bash -c 'PGPASSWORD=$(cat /var/lib/arvados/api_database_pw) exec pg_dump --host=localhost --username=arvados --clean arvados_development > /var/lib/arvados/checkpoint.sql'
+       ;;
+
+    restore)
+       exec docker exec -ti $ARVBOX_CONTAINER bash -c 'PGPASSWORD=$(cat /var/lib/arvados/api_database_pw) exec psql --dbname=arvados_development --host=localhost --username=arvados --quiet --file=/var/lib/arvados/checkpoint.sql'
+       ;;
+
+    hotreset)
+       exec docker exec -i $ARVBOX_CONTAINER /usr/bin/env GEM_HOME=/var/lib/gems /bin/bash - <<EOF
+sv stop api
+sv stop controller
+sv stop websockets
+sv stop keepstore0
+sv stop keepstore1
+sv stop keepproxy
+cd /usr/src/arvados/services/api
+export RAILS_ENV=development
+bundle exec rake db:drop
+rm /var/lib/arvados/api_database_setup
+rm /var/lib/arvados/superuser_token
+rm /var/lib/arvados/keep0-uuid
+rm /var/lib/arvados/keep1-uuid
+rm /var/lib/arvados/keepproxy-uuid
+sv start api
+sv start controller
+sv start websockets
+sv restart keepstore0
+sv restart keepstore1
+sv restart keepproxy
+EOF
+       ;;
+
     *)
         echo "Arvados-in-a-box             https://doc.arvados.org/install/arvbox.html"
         echo
@@ -614,6 +649,9 @@ case "$subcmd" in
         echo "build   <config>   build arvbox Docker image"
         echo "reboot  <config>   stop, build arvbox Docker image, run"
         echo "rebuild <config>   build arvbox Docker image, no layer cache"
+       echo "checkpoint         create database backup"
+       echo "restore            restore checkpoint"
+       echo "hotreset           reset database and restart API without restarting container"
         echo "reset              delete arvbox arvados data (be careful!)"
         echo "destroy            delete all arvbox code and data (be careful!)"
         echo "log <service>      tail log of specified service"