Merge branch 'keep_web_api_client_upload_method' refs #19220
authorPeter Amstutz <peter.amstutz@curii.com>
Thu, 7 Jul 2022 14:57:59 +0000 (10:57 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Thu, 7 Jul 2022 14:57:59 +0000 (10:57 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

47 files changed:
apps/workbench/app/views/users/_show_admin.html.erb
build/run-build-packages-one-target.sh
build/run-build-packages.sh
doc/_config.yml
doc/admin/config-urls.html.textile.liquid
doc/api/keep-s3.html.textile.liquid
doc/user/topics/arvados-sync-external-sources.html.textile.liquid [moved from doc/user/topics/arvados-sync-groups.html.textile.liquid with 51% similarity]
lib/config/cmd_test.go
lib/config/config.default.yml
lib/controller/handler.go
lib/controller/handler_test.go
lib/controller/integration_test.go
lib/service/cmd.go
lib/service/cmd_test.go
sdk/go/arvados/config.go
sdk/go/arvados/fs_base.go
sdk/go/arvados/fs_collection.go
sdk/go/arvados/fs_deferred.go
sdk/go/arvados/fs_project.go
sdk/go/arvados/fs_site.go
sdk/go/arvados/fs_users.go
sdk/go/httpserver/inspect.go [new file with mode: 0644]
sdk/go/httpserver/inspect_test.go [new file with mode: 0644]
sdk/go/httpserver/logger.go
sdk/go/httpserver/request_limiter_test.go
services/api/app/controllers/database_controller.rb
services/api/test/fixtures/users.yml
services/keep-web/s3.go
services/keep-web/s3_test.go
services/login-sync/bin/arvados-login-sync
tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
tools/salt-install/config_examples/multi_host/aws/pillars/postgresql.sls
tools/salt-install/config_examples/multi_host/aws/states/shell_cron_add_login_sync.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_passenger.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/postgresql.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/states/host_entries.sls
tools/salt-install/config_examples/single_host/single_hostname/pillars/nginx_passenger.sls
tools/salt-install/config_examples/single_host/single_hostname/pillars/postgresql.sls
tools/salt-install/config_examples/single_host/single_hostname/states/host_entries.sls
tools/salt-install/installer.sh [new file with mode: 0755]
tools/salt-install/local.params.example.multiple_hosts
tools/salt-install/local.params.example.single_host_multiple_hostnames
tools/salt-install/local.params.example.single_host_single_hostname
tools/salt-install/provision.sh
tools/sync-users/.gitignore [new file with mode: 0644]
tools/sync-users/sync-users.go [new file with mode: 0644]
tools/sync-users/sync-users_test.go [new file with mode: 0644]

index 1da22d438fabe1609cf09857d17ec0b6bd3c9a52..b151ceff042567e6020bdcd2a827203772be8b6d 100644 (file)
@@ -6,7 +6,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
   <div class="col-md-6">
 
     <p>
-      This page enables you to <a href="https://doc.arvados.org/master/admin/user-management.html">manage users</a>.
+      This page enables you to <a href="https://doc.arvados.org/main/admin/user-management.html">manage users</a>.
     </p>
 
     <p>
@@ -22,7 +22,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
       As an admin, you can deactivate and reset this user. This will
       remove all repository/VM permissions for the user. If you
       "setup" the user again, the user will have to sign the user
-      agreement again.  You may also want to <a href="https://doc.arvados.org/master/admin/reassign-ownership.html">reassign data ownership</a>.
+      agreement again.  You may also want to <a href="https://doc.arvados.org/main/admin/reassign-ownership.html">reassign data ownership</a>.
     </p>
 
     <%= button_to "Deactivate #{@object.full_name}", unsetup_user_url(id: @object.uuid), class: 'btn btn-primary', data: {confirm: "Are you sure you want to deactivate #{@object.full_name}?"} %>
index 41b480e697b74c008add6ea1020716db12f29c6f..7d9b5b6a37abb14185a693ff331859137d7f4082 100755 (executable)
@@ -215,6 +215,7 @@ if test -z "$packages" ; then
         arvados-server
         arvados-src
         arvados-sync-groups
+        arvados-sync-users
         arvados-workbench
         arvados-workbench2
         arvados-ws
index 3e1ed6a94de866c9feda7861b08318af8ff61b9d..d4240d4f26b9120c3477aff6460a66aa3c169955 100755 (executable)
@@ -268,6 +268,8 @@ package_go_binary cmd/arvados-server arvados-ws "$FORMAT" "$ARCH" \
     "Arvados Websocket server"
 package_go_binary tools/sync-groups arvados-sync-groups "$FORMAT" "$ARCH" \
     "Synchronize remote groups into Arvados from an external source"
+package_go_binary tools/sync-users arvados-sync-users "$FORMAT" "$ARCH" \
+    "Synchronize remote users into Arvados from an external source"
 package_go_binary tools/keep-block-check keep-block-check "$FORMAT" "$ARCH" \
     "Verify that all data from one set of Keep servers to another was copied"
 package_go_binary tools/keep-rsync keep-rsync "$FORMAT" "$ARCH" \
index 7c5e6d986e49fbc3aa8f42b8ffd6945c186fc94b..d2bb7e797582a8c2a98c850face5442b9e07bfdb 100644 (file)
@@ -177,7 +177,7 @@ navbar:
       - admin/federation.html.textile.liquid
       - admin/merge-remote-account.html.textile.liquid
       - admin/migrating-providers.html.textile.liquid
-      - user/topics/arvados-sync-groups.html.textile.liquid
+      - user/topics/arvados-sync-external-sources.html.textile.liquid
       - admin/scoped-tokens.html.textile.liquid
       - admin/token-expiration-policy.html.textile.liquid
       - admin/user-activity.html.textile.liquid
index e518ea1bf7c11c8791e0aff4c6c220a34c39735f..01c30f0e0eb88eecabc0269cabd40c3aabb07892 100644 (file)
@@ -16,9 +16,9 @@ The @Services@ section lists a number of Arvados services, each with an @Interna
 
 The @ExternalURL@ is the address where the service should be reachable by clients, both from inside and from outside the Arvados cluster. Some services do not expose an Arvados API, only Prometheus metrics. In that case, @ExternalURL@ is not used.
 
-The keys under @InternalURLs@ are addresses that are used by the reverse proxy (e.g. Nginx) that fronts Arvados services. The exception is the @Keepstore@ service, where clients connect directly to the addresses listed under @InternalURLs@. If a service is not fronted by a reverse proxy, e.g. when its endpoint only exposes Prometheus metrics, the intention is that metrics are collected directly from the endpoints defined in @InternalURLs@.
+The keys under @InternalURLs@ are the URLs through which Arvados system components can connect to one another, including the reverse proxy (e.g. Nginx) that fronts Arvados services. The exception is the @Keepstore@ service, where clients on the local network connect directly to @Keepstore.InternalURLs@ (while clients from outside networks connect to @Keepproxy.ExternalURL@). If a service is not fronted by a reverse proxy, e.g. when its endpoint only exposes Prometheus metrics, the intention is that metrics are collected directly from the endpoints defined in @InternalURLs@.
 
-@InternalURLs@ are also used by the service itself to figure out which address/port to listen on.
+Each entry in the @InternalURLs@ section may also indicate a @ListenURL@ to determine the protocol, address/interface, and port where the service process will listen, in case the desired listening address differs from the @InternalURLs@ key itself -- for example, when passing internal traffic through a reverse proxy.
 
 If the Arvados service lives behind a reverse proxy (e.g. Nginx), configuring the reverse proxy and the @InternalURLs@ and @ExternalURL@ values must be done in concert.
 
@@ -228,11 +228,12 @@ Consider this section for the @Controller@ service:
 {% codeblock as yaml %}
   Controller:
     InternalURLs:
-      "http://localhost:8003": {}
+      "https://ctrl-0.internal":
+        ListenURL: "http://localhost:8003"
     ExternalURL: "https://ClusterID.example.com"
 {% endcodeblock %}
 
-The @ExternalURL@ advertised is @https://ClusterID.example.com@. The @Controller@ service will start up on @localhost@ port 8003. Nginx is configured to sit in front of the @Controller@ service and terminates SSL:
+The @ExternalURL@ advertised to clients is @https://ClusterID.example.com@. The @arvados-controller@ process will listen on @localhost@ port 8003. Other Arvados service processes in the cluster can connect to this specific controller instance, using the URL @https://ctrl-0.internal@. Nginx is configured to sit in front of the @Controller@ service and terminate TLS:
 
 <notextile><pre><code>
 # This is the port where nginx expects to contact arvados-controller.
@@ -245,7 +246,7 @@ server {
   # the request is reverse proxied to the upstream 'controller'
 
   listen       443 ssl;
-  server_name  ClusterID.example.com;
+  server_name  ClusterID.example.com ctrl-0.internal;
 
   ssl_certificate     /YOUR/PATH/TO/cert.pem;
   ssl_certificate_key /YOUR/PATH/TO/cert.key;
@@ -275,4 +276,13 @@ server {
 }
 </code></pre></notextile>
 
+If the host part of @ListenURL@ is ambiguous, in the sense that more than one system host is able to listen on that address (e.g., @localhost@), configure each host's startup scripts to set the environment variable @ARVADOS_SERVICE_INTERNAL_URL@ to the @InternalURLs@ key that will reach that host. In the example above, this would be @ARVADOS_SERVICE_INTERNAL_URL=https://ctrl-0.internal@.
+
+If the cluster has just a single node running all of the Arvados server processes, configuration can be simplified:
 
+{% codeblock as yaml %}
+  Controller:
+    InternalURLs:
+      "http://localhost:8003": {}
+    ExternalURL: "https://ClusterID.example.com"
+{% endcodeblock %}
index bee91516bc12fc61e87a51b603361372ad64e358..1fa186e4e7c66bad3f8400a988bd982a69ff7c86 100644 (file)
@@ -70,6 +70,24 @@ h4. GetBucketVersioning
 
 Bucket versioning is presently not supported, so this will always respond that bucket versioning is not enabled.
 
+h3. Accessing collection/project properties as metadata
+
+GetObject, HeadObject, and HeadBucket return Arvados object properties as S3 metadata headers, e.g., @X-Amz-Meta-Foo: bar@.
+
+If the requested path indicates a file or directory placeholder inside a collection, or the top level of a collection, GetObject and HeadObject return the collection properties.
+
+If the requested path indicates a directory placeholder corresponding to a project, GetObject and HeadObject return the properties of the project.
+
+HeadBucket returns the properties of the collection or project corresponding to the bucket name.
+
+Non-string property values are returned in a JSON representation, e.g., @["foo","bar"]@.
+
+As in Amazon S3, property values containing non-ASCII characters are returned in BASE64-encoded form as described in RFC 2047, e.g., @=?UTF-8?b?4pu1?=@.
+
+GetBucketTagging and GetObjectTagging APIs are _not_ supported.
+
+It is not possible to modify collection or project properties using the S3 API.
+
 h3. Authorization mechanisms
 
 Keep-web accepts AWS Signature Version 4 (AWS4-HMAC-SHA256) as well as the older V2 AWS signature.
similarity index 51%
rename from doc/user/topics/arvados-sync-groups.html.textile.liquid
rename to doc/user/topics/arvados-sync-external-sources.html.textile.liquid
index 1f7eede4bb14650a862e1276cf1b8bccbc05e429..0ec0098f053aa0b4d53c5a133bc00c1ed2325f58 100644 (file)
@@ -1,7 +1,7 @@
 ---
 layout: default
 navsection: admin
-title: "Synchronizing external groups"
+title: "Synchronizing from external sources"
 ...
 {% comment %}
 Copyright (C) The Arvados Authors. All rights reserved.
@@ -9,7 +9,51 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
-The @arvados-sync-groups@ tool allows to synchronize groups in Arvados from an external source.
+The @arvados-sync-users@ and @arvados-sync-groups@ tools allow to manage Arvados users & groups from external sources.
+
+These tools are designed to be run periodically reading a file created by a remote auth system (ie: LDAP) dump script, applying what's included on the file as the source of truth.
+
+bq. NOTE: Both tools need to perform several administrative tasks on Arvados, so must be run using a superuser token via @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ environment variables or @~/.config/arvados/settings.conf@ file.
+
+h1. Using arvados-sync-users
+
+This tool reads a CSV (comma-separated values) file having information about user accounts and their expected state on Arvados.
+
+Every line on the file should have 5 fields:
+
+# A user identifier: it could be an email address (default) or a username.
+# The user's first name.
+# The user's last name.
+# The intended user's active state.
+# The intended user's admin state: will always be read as @false@ when @active=false@.
+
+The last 2 fields should be represented as @true@/@false@, @yes@/@no@, or @1@/@0@ values.
+
+h2. Options
+
+The following command line options are supported:
+
+table(table table-bordered table-condensed).
+|_. Option |_. Description |
+|==--help==|This list of options|
+|==--case-insensitive==|Uses case-insensitive username matching|
+|==--deactivate-unlisted==|Deactivate users that aren't listed on the input file. (Current & system users won't be affected)|
+|==--user-id==|Identifier to use in looking up user. One of 'email' or 'username' (Default: 'email')|
+|==--verbose==|Log informational messages|
+|==--version==|Print version and exit|
+
+The tool will create users when needed, and update those existing records to match the desired state described by the fields on the CSV file.
+System users like the root and anonymous are unaffected by this tool.
+In the case of a @LoginCluster@ federation, this tool should be run on the cluster that manages the user accounts, and will fail otherwise.
+
+h2. Example
+
+To sync users using the username to identify every account, reading from some @external_users.csv@ file and deactivating existing users that aren't included in it, the command should be called as follows:
+
+<notextile>
+<pre><code>~$ <span class="userinput">arvados-sync-users --deactivate-unlisted --user-id username /path/to/external_users.csv </span>
+</code></pre>
+</notextile>
 
 h1. Using arvados-sync-groups
 
@@ -21,11 +65,6 @@ Users can be identified by their email address or username: the tool will check
 
 Permission level can be one of the following: @can_read@, @can_write@ or @can_manage@, giving the group member read, read/write or managing privileges on the group. For backwards compatibility purposes, if any record omits the third (permission) field, it will default to @can_write@ permission. You can read more about permissions on the "group management admin guide":{{ site.baseurl }}/admin/group-management.html.
 
-This tool is designed to be run periodically reading a file created by a remote auth system (ie: LDAP) dump script, applying what's included on the file as the source of truth.
-
-
-bq. NOTE: @arvados-sync-groups@ needs to perform several administrative tasks on Arvados, so must be run using a superuser token
-
 h2. Options
 
 The following command line options are supported:
index 7167982ccd7021f3b43a895190909694c493b7da..9503a54d2d7c137ec5c2e805a3aaec9b990014ce 100644 (file)
@@ -217,7 +217,7 @@ Clusters:
        code := DumpCommand.RunCommand("arvados config-dump", []string{"-config", "-"}, bytes.NewBufferString(in), &stdout, &stderr)
        c.Check(code, check.Equals, 0)
        c.Check(stdout.String(), check.Matches, `(?ms).*TimeoutBooting: 10m\n.*`)
-       c.Check(stdout.String(), check.Matches, `(?ms).*http://localhost:12345/: {}\n.*`)
+       c.Check(stdout.String(), check.Matches, `(?ms).*http://localhost:12345/:\n +ListenURL: ""\n.*`)
 }
 
 func (s *CommandSuite) TestDump_UnknownKey(c *check.C) {
index a9bbf4eee9b5002e733cb46df5ffe9be995ffdcf..472a22c6b2cb11a3566d882e6420f52400ca4b13 100644 (file)
@@ -22,47 +22,78 @@ Clusters:
 
     Services:
 
-      # In each of the service sections below, the keys under
-      # InternalURLs are the endpoints where the service should be
-      # listening, and reachable from other hosts in the
-      # cluster. Example:
+      # Each of the service sections below specifies InternalURLs
+      # (each with optional ListenURL) and ExternalURL.
+      #
+      # InternalURLs specify how other Arvados service processes will
+      # connect to the service. Typically these use internal hostnames
+      # and high port numbers. Example:
+      #
+      # InternalURLs:
+      #   "http://host1.internal.example:12345": {}
+      #   "http://host2.internal.example:12345": {}
+      #
+      # ListenURL specifies the address and port the service process's
+      # HTTP server should listen on, if different from the
+      # InternalURL itself. Example, using an intermediate TLS proxy:
       #
       # InternalURLs:
-      #   "http://host1.example:12345": {}
-      #   "http://host2.example:12345": {}
+      #   "https://host1.internal.example":
+      #     ListenURL: "http://10.0.0.7:12345"
+      #
+      # When there are multiple InternalURLs configured, the service
+      # process will try listening on each InternalURLs (using
+      # ListenURL if provided) until one works. If you use a ListenURL
+      # like "0.0.0.0" which can be bound on any machine, use an
+      # environment variable
+      # ARVADOS_SERVICE_INTERNAL_URL=http://host1.internal.example to
+      # control which entry to use.
+      #
+      # ExternalURL specifies how applications/clients will connect to
+      # the service, regardless of whether they are inside or outside
+      # the cluster. Example:
+      #
+      # ExternalURL: "https://keep.zzzzz.example.com/"
+      #
+      # To avoid routing internal traffic through external networks,
+      # use split-horizon DNS for ExternalURL host names: inside the
+      # cluster's private network "host.zzzzz.example.com" resolves to
+      # the host's private IP address, while outside the cluster
+      # "host.zzzzz.example.com" resolves to the host's public IP
+      # address (or its external gateway or load balancer).
 
       RailsAPI:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
       Controller:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
       Websocket:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
       Keepbalance:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
       GitHTTP:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
       GitSSH:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
       DispatchCloud:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
       DispatchLSF:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
       DispatchSLURM:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
       Keepproxy:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
       WebDAV:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         # Base URL for Workbench inline preview.  If blank, use
         # WebDAVDownload instead, and disable inline preview.
         # If both are empty, downloading collections from workbench
@@ -101,7 +132,7 @@ Clusters:
         ExternalURL: ""
 
       WebDAVDownload:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         # Base URL for download links. If blank, serve links to WebDAV
         # with disposition=attachment query param.  Unlike preview links,
         # browsers do not render attachments, so there is no risk of XSS.
@@ -117,6 +148,7 @@ Clusters:
       Keepstore:
         InternalURLs:
           SAMPLE:
+            ListenURL: ""
             # Rendezvous is normally empty/omitted. When changing the
             # URL of a Keepstore service, Rendezvous should be set to
             # the old URL (with trailing slash omitted) to preserve
@@ -124,10 +156,10 @@ Clusters:
             Rendezvous: ""
         ExternalURL: ""
       Composer:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
       WebShell:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         # ShellInABox service endpoint URL for a given VM.  If empty, do not
         # offer web shell logins.
         #
@@ -138,13 +170,13 @@ Clusters:
         # https://*.webshell.uuid_prefix.arvadosapi.com
         ExternalURL: ""
       Workbench1:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
       Workbench2:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
       Health:
-        InternalURLs: {SAMPLE: {}}
+        InternalURLs: {SAMPLE: {ListenURL: ""}}
         ExternalURL: ""
 
     PostgreSQL:
index f5840b34ce72cd18da4d75c4d27dbb23920e53dd..665fd5c636372fc4a21bd7de68c5d886aafbcc7c 100644 (file)
@@ -13,7 +13,6 @@ import (
        "net/url"
        "strings"
        "sync"
-       "time"
 
        "git.arvados.org/arvados.git/lib/controller/api"
        "git.arvados.org/arvados.git/lib/controller/federation"
@@ -61,12 +60,6 @@ func (h *Handler) ServeHTTP(w http.ResponseWriter, req *http.Request) {
                        req.URL.Path = strings.Replace(req.URL.Path, "//", "/", -1)
                }
        }
-       if h.Cluster.API.RequestTimeout > 0 {
-               ctx, cancel := context.WithDeadline(req.Context(), time.Now().Add(time.Duration(h.Cluster.API.RequestTimeout)))
-               req = req.WithContext(ctx)
-               defer cancel()
-       }
-
        h.handlerStack.ServeHTTP(w, req)
 }
 
index 5e467cb0588607d3deaa06c1d92326ed18f8f09c..39c2b1c68e5c82921e10bc9125d54e17846a8fed 100644 (file)
@@ -204,17 +204,21 @@ func (s *HandlerSuite) TestProxyDiscoveryDoc(c *check.C) {
        c.Check(len(dd.Schemas), check.Not(check.Equals), 0)
 }
 
-func (s *HandlerSuite) TestRequestTimeout(c *check.C) {
-       s.cluster.API.RequestTimeout = arvados.Duration(time.Nanosecond)
-       req := httptest.NewRequest("GET", "/discovery/v1/apis/arvados/v1/rest", nil)
+// Handler should give up and exit early if request context is
+// cancelled due to client hangup, httpserver.HandlerWithDeadline,
+// etc.
+func (s *HandlerSuite) TestRequestCancel(c *check.C) {
+       ctx, cancel := context.WithCancel(context.Background())
+       req := httptest.NewRequest("GET", "/discovery/v1/apis/arvados/v1/rest", nil).WithContext(ctx)
        resp := httptest.NewRecorder()
+       cancel()
        s.handler.ServeHTTP(resp, req)
        c.Check(resp.Code, check.Equals, http.StatusBadGateway)
        var jresp httpserver.ErrorResponse
        err := json.Unmarshal(resp.Body.Bytes(), &jresp)
        c.Check(err, check.IsNil)
        c.Assert(len(jresp.Errors), check.Equals, 1)
-       c.Check(jresp.Errors[0], check.Matches, `.*context deadline exceeded.*`)
+       c.Check(jresp.Errors[0], check.Matches, `.*context canceled`)
 }
 
 func (s *HandlerSuite) TestProxyWithoutToken(c *check.C) {
index 67d60197e7e160b5f7d99e510106a55a18ffd07f..b0ec4293a38acfdf6a349db48fff96c7bf3f7a1a 100644 (file)
@@ -1126,7 +1126,7 @@ func (s *IntegrationSuite) TestForwardRuntimeTokenToLoginCluster(c *check.C) {
 }
 
 func (s *IntegrationSuite) TestRunTrivialContainer(c *check.C) {
-       outcoll := s.runContainer(c, "z1111", map[string]interface{}{
+       outcoll, _ := s.runContainer(c, "z1111", "", map[string]interface{}{
                "command":             []string{"sh", "-c", "touch \"/out/hello world\" /out/ohai"},
                "container_image":     "busybox:uclibc",
                "cwd":                 "/tmp",
@@ -1141,10 +1141,49 @@ func (s *IntegrationSuite) TestRunTrivialContainer(c *check.C) {
        c.Check(outcoll.PortableDataHash, check.Equals, "8fa5dee9231a724d7cf377c5a2f4907c+65")
 }
 
-func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, ctrSpec map[string]interface{}, expectExitCode int) arvados.Collection {
+func (s *IntegrationSuite) TestContainerInputOnDifferentCluster(c *check.C) {
+       conn := s.super.Conn("z1111")
+       rootctx, _, _ := s.super.RootClients("z1111")
+       userctx, ac, _, _ := s.super.UserClients("z1111", rootctx, c, conn, s.oidcprovider.AuthEmail, true)
+       z1coll, err := conn.CollectionCreate(userctx, arvados.CreateOptions{Attrs: map[string]interface{}{
+               "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:ocelot\n",
+       }})
+       c.Assert(err, check.IsNil)
+
+       outcoll, logcfs := s.runContainer(c, "z2222", ac.AuthToken, map[string]interface{}{
+               "command":         []string{"ls", "/in"},
+               "container_image": "busybox:uclibc",
+               "cwd":             "/tmp",
+               "environment":     map[string]string{},
+               "mounts": map[string]arvados.Mount{
+                       "/in":  {Kind: "collection", PortableDataHash: z1coll.PortableDataHash},
+                       "/out": {Kind: "tmp", Capacity: 10000},
+               },
+               "output_path":         "/out",
+               "runtime_constraints": arvados.RuntimeConstraints{RAM: 100000000, VCPUs: 1},
+               "priority":            1,
+               "state":               arvados.ContainerRequestStateCommitted,
+               "container_count_max": 1,
+       }, -1)
+       if outcoll.UUID == "" {
+               arvmountlog, err := fs.ReadFile(arvados.FS(logcfs), "/arv-mount.txt")
+               c.Check(err, check.IsNil)
+               c.Check(string(arvmountlog), check.Matches, `(?ms).*cannot use a locally issued token to forward a request to our login cluster \(z1111\).*`)
+               c.Skip("this use case is not supported yet")
+       }
+       stdout, err := fs.ReadFile(arvados.FS(logcfs), "/stdout.txt")
+       c.Check(err, check.IsNil)
+       c.Check(string(stdout), check.Equals, "ocelot\n")
+}
+
+func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, token string, ctrSpec map[string]interface{}, expectExitCode int) (outcoll arvados.Collection, logcfs arvados.CollectionFileSystem) {
        conn := s.super.Conn(clusterID)
        rootctx, _, _ := s.super.RootClients(clusterID)
-       _, ac, kc, _ := s.super.UserClients(clusterID, rootctx, c, conn, s.oidcprovider.AuthEmail, true)
+       if token == "" {
+               _, ac, _, _ := s.super.UserClients(clusterID, rootctx, c, conn, s.oidcprovider.AuthEmail, true)
+               token = ac.AuthToken
+       }
+       _, ac, kc := s.super.ClientsWithToken(clusterID, token)
 
        c.Log("[docker load]")
        out, err := exec.Command("docker", "load", "--input", arvadostest.BusyboxDockerImage(c)).CombinedOutput()
@@ -1164,7 +1203,7 @@ func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, ctrSpec ma
        })
        c.Assert(err, check.IsNil)
 
-       showlogs := func(collectionID string) {
+       showlogs := func(collectionID string) arvados.CollectionFileSystem {
                var logcoll arvados.Collection
                err = ac.RequestAndDecode(&logcoll, "GET", "/arvados/v1/collections/"+collectionID, nil, nil)
                c.Assert(err, check.IsNil)
@@ -1182,42 +1221,36 @@ func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, ctrSpec ma
                        c.Logf("=== %s\n%s\n", path, buf)
                        return nil
                })
+               return cfs
        }
 
        var ctr arvados.Container
        var lastState arvados.ContainerState
        deadline := time.Now().Add(time.Minute)
-wait:
-       for ; ; lastState = ctr.State {
-               err = ac.RequestAndDecode(&ctr, "GET", "/arvados/v1/containers/"+cr.ContainerUUID, nil, nil)
+       for cr.State != arvados.ContainerRequestStateFinal {
+               err = ac.RequestAndDecode(&cr, "GET", "/arvados/v1/container_requests/"+cr.UUID, nil, nil)
                c.Assert(err, check.IsNil)
-               switch ctr.State {
-               case lastState:
+               err = ac.RequestAndDecode(&ctr, "GET", "/arvados/v1/containers/"+cr.ContainerUUID, nil, nil)
+               if err != nil {
+                       c.Logf("error getting container state: %s", err)
+               } else if ctr.State != lastState {
+                       c.Logf("container state changed to %q", ctr.State)
+                       lastState = ctr.State
+               } else {
                        if time.Now().After(deadline) {
-                               c.Errorf("timed out, container request state is %q", cr.State)
+                               c.Errorf("timed out, container state is %q", cr.State)
                                showlogs(ctr.Log)
                                c.FailNow()
                        }
                        time.Sleep(time.Second / 2)
-               case arvados.ContainerStateComplete:
-                       break wait
-               case arvados.ContainerStateQueued, arvados.ContainerStateLocked, arvados.ContainerStateRunning:
-                       c.Logf("container state changed to %q", ctr.State)
-               default:
-                       c.Errorf("unexpected container state %q", ctr.State)
-                       showlogs(ctr.Log)
-                       c.FailNow()
                }
        }
-       c.Check(ctr.ExitCode, check.Equals, 0)
-
-       err = ac.RequestAndDecode(&cr, "GET", "/arvados/v1/container_requests/"+cr.UUID, nil, nil)
-       c.Assert(err, check.IsNil)
-
-       showlogs(cr.LogUUID)
-
-       var outcoll arvados.Collection
-       err = ac.RequestAndDecode(&outcoll, "GET", "/arvados/v1/collections/"+cr.OutputUUID, nil, nil)
-       c.Assert(err, check.IsNil)
-       return outcoll
+       if expectExitCode >= 0 {
+               c.Check(ctr.State, check.Equals, arvados.ContainerStateComplete)
+               c.Check(ctr.ExitCode, check.Equals, expectExitCode)
+               err = ac.RequestAndDecode(&outcoll, "GET", "/arvados/v1/collections/"+cr.OutputUUID, nil, nil)
+               c.Assert(err, check.IsNil)
+       }
+       logcfs = showlogs(cr.LogUUID)
+       return outcoll, logcfs
 }
index 679cbede13bc8cf141a34ec40373a458c5195451..4b640c4e4773225ccb0e9312bc18a436552e9cfb 100644 (file)
@@ -121,11 +121,11 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
        })
        ctx := ctxlog.Context(c.ctx, logger)
 
-       listenURL, err := getListenAddr(cluster.Services, c.svcName, log)
+       listenURL, internalURL, err := getListenAddr(cluster.Services, c.svcName, log)
        if err != nil {
                return 1
        }
-       ctx = context.WithValue(ctx, contextKeyURL{}, listenURL)
+       ctx = context.WithValue(ctx, contextKeyURL{}, internalURL)
 
        reg := prometheus.NewRegistry()
        loader.RegisterMetrics(reg)
@@ -147,9 +147,10 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
        instrumented := httpserver.Instrument(reg, log,
                httpserver.HandlerWithDeadline(cluster.API.RequestTimeout.Duration(),
                        httpserver.AddRequestIDs(
-                               httpserver.LogRequests(
-                                       interceptHealthReqs(cluster.ManagementToken, handler.CheckHealth,
-                                               httpserver.NewRequestLimiter(cluster.API.MaxConcurrentRequests, handler, reg))))))
+                               httpserver.Inspect(reg, cluster.ManagementToken,
+                                       httpserver.LogRequests(
+                                               interceptHealthReqs(cluster.ManagementToken, handler.CheckHealth,
+                                                       httpserver.NewRequestLimiter(cluster.API.MaxConcurrentRequests, handler, reg)))))))
        srv := &httpserver.Server{
                Server: http.Server{
                        Handler:     ifCollectionInHost(instrumented, instrumented.ServeAPI(cluster.ManagementToken, instrumented)),
@@ -157,7 +158,7 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
                },
                Addr: listenURL.Host,
        }
-       if listenURL.Scheme == "https" {
+       if listenURL.Scheme == "https" || listenURL.Scheme == "wss" {
                tlsconfig, err := tlsConfigWithCertUpdater(cluster, logger)
                if err != nil {
                        logger.WithError(err).Errorf("cannot start %s service on %s", c.svcName, listenURL.String())
@@ -223,28 +224,72 @@ func interceptHealthReqs(mgtToken string, checkHealth func() error, next http.Ha
        return ifCollectionInHost(next, mux)
 }
 
-func getListenAddr(svcs arvados.Services, prog arvados.ServiceName, log logrus.FieldLogger) (arvados.URL, error) {
+// Determine listenURL (addr:port where server should bind) and
+// internalURL (target url that client should connect to) for a
+// service.
+//
+// If the config does not specify ListenURL, we check all of the
+// configured InternalURLs. If there is exactly one that matches our
+// hostname, or exactly one that matches a local interface address,
+// then we use that as listenURL.
+//
+// Note that listenURL and internalURL may use different protocols
+// (e.g., listenURL is http, but the service sits behind a proxy, so
+// clients connect using https).
+func getListenAddr(svcs arvados.Services, prog arvados.ServiceName, log logrus.FieldLogger) (arvados.URL, arvados.URL, error) {
        svc, ok := svcs.Map()[prog]
        if !ok {
-               return arvados.URL{}, fmt.Errorf("unknown service name %q", prog)
+               return arvados.URL{}, arvados.URL{}, fmt.Errorf("unknown service name %q", prog)
        }
 
-       if want := os.Getenv("ARVADOS_SERVICE_INTERNAL_URL"); want == "" {
-       } else if url, err := url.Parse(want); err != nil {
-               return arvados.URL{}, fmt.Errorf("$ARVADOS_SERVICE_INTERNAL_URL (%q): %s", want, err)
-       } else {
+       if want := os.Getenv("ARVADOS_SERVICE_INTERNAL_URL"); want != "" {
+               url, err := url.Parse(want)
+               if err != nil {
+                       return arvados.URL{}, arvados.URL{}, fmt.Errorf("$ARVADOS_SERVICE_INTERNAL_URL (%q): %s", want, err)
+               }
                if url.Path == "" {
                        url.Path = "/"
                }
-               return arvados.URL(*url), nil
+               for internalURL, conf := range svc.InternalURLs {
+                       if internalURL.String() == url.String() {
+                               listenURL := conf.ListenURL
+                               if listenURL.Host == "" {
+                                       listenURL = internalURL
+                               }
+                               return listenURL, internalURL, nil
+                       }
+               }
+               log.Warnf("possible configuration error: listening on %s (from $ARVADOS_SERVICE_INTERNAL_URL) even though configuration does not have a matching InternalURLs entry", url)
+               internalURL := arvados.URL(*url)
+               return internalURL, internalURL, nil
        }
 
        errors := []string{}
-       for url := range svc.InternalURLs {
-               listener, err := net.Listen("tcp", url.Host)
+       for internalURL, conf := range svc.InternalURLs {
+               listenURL := conf.ListenURL
+               if listenURL.Host == "" {
+                       // If ListenURL is not specified, assume
+                       // InternalURL is also usable as the listening
+                       // proto/addr/port (i.e., simple case with no
+                       // intermediate proxy/routing)
+                       listenURL = internalURL
+               }
+               listenAddr := listenURL.Host
+               if _, _, err := net.SplitHostPort(listenAddr); err != nil {
+                       // url "https://foo.example/" (with no
+                       // explicit port name/number) means listen on
+                       // the well-known port for the specified
+                       // protocol, "foo.example:https".
+                       port := listenURL.Scheme
+                       if port == "ws" || port == "wss" {
+                               port = "http" + port[2:]
+                       }
+                       listenAddr = net.JoinHostPort(listenAddr, port)
+               }
+               listener, err := net.Listen("tcp", listenAddr)
                if err == nil {
                        listener.Close()
-                       return url, nil
+                       return listenURL, internalURL, nil
                } else if strings.Contains(err.Error(), "cannot assign requested address") {
                        // If 'Host' specifies a different server than
                        // the current one, it'll resolve the hostname
@@ -252,13 +297,13 @@ func getListenAddr(svcs arvados.Services, prog arvados.ServiceName, log logrus.F
                        // can't bind an IP address it doesn't own.
                        continue
                } else {
-                       errors = append(errors, fmt.Sprintf("tried %v, got %v", url, err))
+                       errors = append(errors, fmt.Sprintf("%s: %s", listenURL, err))
                }
        }
        if len(errors) > 0 {
-               return arvados.URL{}, fmt.Errorf("could not enable the %q service on this host: %s", prog, strings.Join(errors, "; "))
+               return arvados.URL{}, arvados.URL{}, fmt.Errorf("could not enable the %q service on this host: %s", prog, strings.Join(errors, "; "))
        }
-       return arvados.URL{}, fmt.Errorf("configuration does not enable the %q service on this host", prog)
+       return arvados.URL{}, arvados.URL{}, fmt.Errorf("configuration does not enable the %q service on this host", prog)
 }
 
 type contextKeyURL struct{}
index 10591d9b55cf44beb41e7a898a296f20a0aab851..7db91092745e2e4886f0b1b35a3015da0f0387fc 100644 (file)
@@ -11,7 +11,9 @@ import (
        "crypto/tls"
        "fmt"
        "io/ioutil"
+       "net"
        "net/http"
+       "net/url"
        "os"
        "testing"
        "time"
@@ -35,6 +37,126 @@ const (
        contextKey key = iota
 )
 
+func (*Suite) TestGetListenAddress(c *check.C) {
+       // Find an available port on the testing host, so the test
+       // cases don't get confused by "already in use" errors.
+       listener, err := net.Listen("tcp", ":")
+       c.Assert(err, check.IsNil)
+       _, unusedPort, err := net.SplitHostPort(listener.Addr().String())
+       c.Assert(err, check.IsNil)
+       listener.Close()
+
+       defer os.Unsetenv("ARVADOS_SERVICE_INTERNAL_URL")
+       for idx, trial := range []struct {
+               // internalURL => listenURL, both with trailing "/"
+               // because config loader always adds it
+               internalURLs     map[string]string
+               envVar           string
+               expectErrorMatch string
+               expectLogsMatch  string
+               expectListen     string
+               expectInternal   string
+       }{
+               {
+                       internalURLs:   map[string]string{"http://localhost:" + unusedPort + "/": ""},
+                       expectListen:   "http://localhost:" + unusedPort + "/",
+                       expectInternal: "http://localhost:" + unusedPort + "/",
+               },
+               { // implicit port 80 in InternalURLs
+                       internalURLs:     map[string]string{"http://localhost/": ""},
+                       expectErrorMatch: `.*:80: bind: permission denied`,
+               },
+               { // implicit port 443 in InternalURLs
+                       internalURLs:   map[string]string{"https://host.example/": "http://localhost:" + unusedPort + "/"},
+                       expectListen:   "http://localhost:" + unusedPort + "/",
+                       expectInternal: "https://host.example/",
+               },
+               { // implicit port 443 in ListenURL
+                       internalURLs:     map[string]string{"wss://host.example/": "wss://localhost/"},
+                       expectErrorMatch: `.*:443: bind: permission denied`,
+               },
+               {
+                       internalURLs:   map[string]string{"https://hostname.example/": "http://localhost:8000/"},
+                       expectListen:   "http://localhost:8000/",
+                       expectInternal: "https://hostname.example/",
+               },
+               {
+                       internalURLs: map[string]string{
+                               "https://hostname1.example/": "http://localhost:12435/",
+                               "https://hostname2.example/": "http://localhost:" + unusedPort + "/",
+                       },
+                       envVar:         "https://hostname2.example", // note this works despite missing trailing "/"
+                       expectListen:   "http://localhost:" + unusedPort + "/",
+                       expectInternal: "https://hostname2.example/",
+               },
+               { // cannot listen on any of the ListenURLs
+                       internalURLs: map[string]string{
+                               "https://hostname1.example/": "http://1.2.3.4:" + unusedPort + "/",
+                               "https://hostname2.example/": "http://1.2.3.4:" + unusedPort + "/",
+                       },
+                       expectErrorMatch: "configuration does not enable the \"arvados-controller\" service on this host",
+               },
+               { // cannot listen on any of the (implied) ListenURLs
+                       internalURLs: map[string]string{
+                               "https://1.2.3.4/": "",
+                               "https://1.2.3.5/": "",
+                       },
+                       expectErrorMatch: "configuration does not enable the \"arvados-controller\" service on this host",
+               },
+               { // impossible port number
+                       internalURLs: map[string]string{
+                               "https://host.example/": "http://0.0.0.0:1234567",
+                       },
+                       expectErrorMatch: `.*:1234567: listen tcp: address 1234567: invalid port`,
+               },
+               {
+                       // env var URL not mentioned in config = obey env var, with warning
+                       internalURLs:    map[string]string{"https://hostname1.example/": "http://localhost:8000/"},
+                       envVar:          "https://hostname2.example",
+                       expectListen:    "https://hostname2.example/",
+                       expectInternal:  "https://hostname2.example/",
+                       expectLogsMatch: `.*\Qpossible configuration error: listening on https://hostname2.example/ (from $ARVADOS_SERVICE_INTERNAL_URL) even though configuration does not have a matching InternalURLs entry\E.*\n`,
+               },
+               {
+                       // env var + empty config = obey env var, with warning
+                       envVar:          "https://hostname.example",
+                       expectListen:    "https://hostname.example/",
+                       expectInternal:  "https://hostname.example/",
+                       expectLogsMatch: `.*\Qpossible configuration error: listening on https://hostname.example/ (from $ARVADOS_SERVICE_INTERNAL_URL) even though configuration does not have a matching InternalURLs entry\E.*\n`,
+               },
+       } {
+               c.Logf("trial %d %+v", idx, trial)
+               os.Setenv("ARVADOS_SERVICE_INTERNAL_URL", trial.envVar)
+               var logbuf bytes.Buffer
+               log := ctxlog.New(&logbuf, "text", "info")
+               services := arvados.Services{Controller: arvados.Service{InternalURLs: map[arvados.URL]arvados.ServiceInstance{}}}
+               for k, v := range trial.internalURLs {
+                       u, err := url.Parse(k)
+                       c.Assert(err, check.IsNil)
+                       si := arvados.ServiceInstance{}
+                       if v != "" {
+                               u, err := url.Parse(v)
+                               c.Assert(err, check.IsNil)
+                               si.ListenURL = arvados.URL(*u)
+                       }
+                       services.Controller.InternalURLs[arvados.URL(*u)] = si
+               }
+               listenURL, internalURL, err := getListenAddr(services, "arvados-controller", log)
+               if trial.expectLogsMatch != "" {
+                       c.Check(logbuf.String(), check.Matches, trial.expectLogsMatch)
+               }
+               if trial.expectErrorMatch != "" {
+                       c.Check(err, check.ErrorMatches, trial.expectErrorMatch)
+                       continue
+               }
+               if !c.Check(err, check.IsNil) {
+                       continue
+               }
+               c.Check(listenURL.String(), check.Equals, trial.expectListen)
+               c.Check(internalURL.String(), check.Equals, trial.expectInternal)
+       }
+}
+
 func (*Suite) TestCommand(c *check.C) {
        cf, err := ioutil.TempFile("", "cmd_test.")
        c.Assert(err, check.IsNil)
index 0d8f293124976cb42f9d009da1b16c5057ba1071..c90551a6109af9dc9afbdd33bed9c78f5f7bc5ed 100644 (file)
@@ -401,6 +401,7 @@ func (su URL) String() string {
 }
 
 type ServiceInstance struct {
+       ListenURL  URL
        Rendezvous string `json:",omitempty"`
 }
 
index ce9253ab3d4f5d5447273cfe02edca716afc52fd..2ad4d1f859f1141035c04cb4180c5ef623d1fa04 100644 (file)
@@ -234,6 +234,14 @@ type fileinfo struct {
        mode    os.FileMode
        size    int64
        modTime time.Time
+       // If not nil, sys() returns the source data structure, which
+       // can be a *Collection, *Group, or nil. Currently populated
+       // only for project dirs and top-level collection dirs. Does
+       // not stay up to date with upstream changes.
+       //
+       // Intended to support keep-web's properties-as-s3-metadata
+       // feature (https://dev.arvados.org/issues/19088).
+       sys func() interface{}
 }
 
 // Name implements os.FileInfo.
@@ -261,9 +269,12 @@ func (fi fileinfo) Size() int64 {
        return fi.size
 }
 
-// Sys implements os.FileInfo.
+// Sys implements os.FileInfo. See comment in fileinfo struct.
 func (fi fileinfo) Sys() interface{} {
-       return nil
+       if fi.sys == nil {
+               return nil
+       }
+       return fi.sys()
 }
 
 type nullnode struct{}
index ccfbdc4da262c13ee3d319ad072f73a10b9b1d0a..26012e240603d0be43a1019346c4e946e2821790 100644 (file)
@@ -85,6 +85,7 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile
                                name:    ".",
                                mode:    os.ModeDir | 0755,
                                modTime: modTime,
+                               sys:     func() interface{} { return c },
                        },
                        inodes: make(map[string]inode),
                },
index 66a126a39c12a45620a93c59a9047ac3d4ae1fe8..1dfa2df6e4005f0b6d93f497a657e81e583bad14 100644 (file)
@@ -24,6 +24,7 @@ func deferredCollectionFS(fs FileSystem, parent inode, coll Collection) inode {
                        name:    coll.Name,
                        modTime: modTime,
                        mode:    0755 | os.ModeDir,
+                       sys:     func() interface{} { return &coll },
                },
        }
        return &deferrednode{wrapped: placeholder, create: func() inode {
index 380fb9c6d5f2f8dac636b06c90df14973c0adb36..bea1f76e24f24faffa38fbccd7b6b880ffb2d22e 100644 (file)
@@ -38,6 +38,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in
                                {"uuid", "is_a", []string{"arvados#collection", "arvados#group"}},
                                {"groups.group_class", "=", "project"},
                        },
+                       Select: []string{"uuid", "name", "modified_at", "properties"},
                })
                if err != nil {
                        return nil, err
@@ -63,7 +64,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in
        if strings.Contains(coll.UUID, "-j7d0g-") {
                // Group item was loaded into a Collection var -- but
                // we only need the Name and UUID anyway, so it's OK.
-               return fs.newProjectNode(parent, coll.Name, coll.UUID), nil
+               return fs.newProjectNode(parent, coll.Name, coll.UUID, nil), nil
        } else if strings.Contains(coll.UUID, "-4zz18-") {
                return deferredCollectionFS(fs, parent, coll), nil
        } else {
@@ -98,6 +99,7 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode,
                        Count:   "none",
                        Filters: filters,
                        Order:   "uuid",
+                       Select:  []string{"uuid", "name", "modified_at", "properties"},
                }
 
                for {
@@ -121,7 +123,12 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode,
                                        continue
                                }
                                if strings.Contains(i.UUID, "-j7d0g-") {
-                                       inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID))
+                                       inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID, &Group{
+                                               UUID:       i.UUID,
+                                               Name:       i.Name,
+                                               ModifiedAt: i.ModifiedAt,
+                                               Properties: i.Properties,
+                                       }))
                                } else if strings.Contains(i.UUID, "-4zz18-") {
                                        inodes = append(inodes, deferredCollectionFS(fs, parent, i))
                                } else {
index 3892be1e9a97610522a1fc219d5c0fb807788e4c..bb2eee77925fd2c682c7d42e1e8e175c4f2f1489 100644 (file)
@@ -77,7 +77,7 @@ func (fs *customFileSystem) MountProject(mount, uuid string) {
        fs.root.treenode.Lock()
        defer fs.root.treenode.Unlock()
        fs.root.treenode.Child(mount, func(inode) (inode, error) {
-               return fs.newProjectNode(fs.root, mount, uuid), nil
+               return fs.newProjectNode(fs.root, mount, uuid, nil), nil
        })
 }
 
@@ -140,7 +140,7 @@ func (fs *customFileSystem) mountByID(parent inode, id string) inode {
        if strings.Contains(id, "-4zz18-") || pdhRegexp.MatchString(id) {
                return fs.mountCollection(parent, id)
        } else if strings.Contains(id, "-j7d0g-") {
-               return fs.newProjectNode(fs.root, id, id)
+               return fs.newProjectNode(fs.root, id, id, nil)
        } else {
                return nil
        }
@@ -161,7 +161,8 @@ func (fs *customFileSystem) mountCollection(parent inode, id string) inode {
        return cfs
 }
 
-func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode {
+func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, proj *Group) inode {
+       var projLoading sync.Mutex
        return &lookupnode{
                stale:   fs.Stale,
                loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) },
@@ -174,6 +175,20 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode
                                name:    name,
                                modTime: time.Now(),
                                mode:    0755 | os.ModeDir,
+                               sys: func() interface{} {
+                                       projLoading.Lock()
+                                       defer projLoading.Unlock()
+                                       if proj != nil {
+                                               return proj
+                                       }
+                                       var g Group
+                                       err := fs.RequestAndDecode(&g, "GET", "arvados/v1/groups/"+uuid, nil, nil)
+                                       if err != nil {
+                                               return err
+                                       }
+                                       proj = &g
+                                       return proj
+                               },
                        },
                },
        }
index 00f70369694430f70f0cca185270ceb905e34c01..ae47414b7abe80b9c0e2a2ff0a5e7c36d7320dd2 100644 (file)
@@ -20,7 +20,7 @@ func (fs *customFileSystem) usersLoadOne(parent inode, name string) (inode, erro
                return nil, os.ErrNotExist
        }
        user := resp.Items[0]
-       return fs.newProjectNode(parent, user.Username, user.UUID), nil
+       return fs.newProjectNode(parent, user.Username, user.UUID, nil), nil
 }
 
 func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) {
@@ -41,7 +41,7 @@ func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) {
                        if user.Username == "" {
                                continue
                        }
-                       inodes = append(inodes, fs.newProjectNode(parent, user.Username, user.UUID))
+                       inodes = append(inodes, fs.newProjectNode(parent, user.Username, user.UUID, nil))
                }
                params.Filters = []Filter{{"uuid", ">", resp.Items[len(resp.Items)-1].UUID}}
        }
diff --git a/sdk/go/httpserver/inspect.go b/sdk/go/httpserver/inspect.go
new file mode 100644 (file)
index 0000000..cb08acf
--- /dev/null
@@ -0,0 +1,133 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package httpserver
+
+import (
+       "encoding/json"
+       "net/http"
+       "sort"
+       "sync"
+       "sync/atomic"
+       "time"
+
+       "github.com/prometheus/client_golang/prometheus"
+)
+
+// Inspect serves a report of current requests at "GET
+// /_inspect/requests", and passes other requests through to the next
+// handler.
+//
+// If registry is not nil, Inspect registers metrics about current
+// requests.
+func Inspect(registry *prometheus.Registry, authToken string, next http.Handler) http.Handler {
+       type ent struct {
+               startTime  time.Time
+               hangupTime atomic.Value
+       }
+       current := map[*http.Request]*ent{}
+       mtx := sync.Mutex{}
+       if registry != nil {
+               registry.MustRegister(prometheus.NewGaugeFunc(
+                       prometheus.GaugeOpts{
+                               Namespace: "arvados",
+                               Name:      "max_active_request_age_seconds",
+                               Help:      "Age of oldest active request",
+                       },
+                       func() float64 {
+                               mtx.Lock()
+                               defer mtx.Unlock()
+                               earliest := time.Time{}
+                               any := false
+                               for _, e := range current {
+                                       if _, ok := e.hangupTime.Load().(time.Time); ok {
+                                               // Don't count abandoned requests here
+                                               continue
+                                       }
+                                       if !any || e.startTime.Before(earliest) {
+                                               any = true
+                                               earliest = e.startTime
+                                       }
+                               }
+                               if !any {
+                                       return 0
+                               }
+                               return float64(time.Since(earliest).Seconds())
+                       },
+               ))
+               registry.MustRegister(prometheus.NewGaugeFunc(
+                       prometheus.GaugeOpts{
+                               Namespace: "arvados",
+                               Name:      "max_abandoned_request_age_seconds",
+                               Help:      "Maximum time since client hung up on a request whose processing thread is still running",
+                       },
+                       func() float64 {
+                               mtx.Lock()
+                               defer mtx.Unlock()
+                               earliest := time.Time{}
+                               any := false
+                               for _, e := range current {
+                                       if hangupTime, ok := e.hangupTime.Load().(time.Time); ok {
+                                               if !any || hangupTime.Before(earliest) {
+                                                       any = true
+                                                       earliest = hangupTime
+                                               }
+                                       }
+                               }
+                               if !any {
+                                       return 0
+                               }
+                               return float64(time.Since(earliest).Seconds())
+                       },
+               ))
+       }
+       return http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
+               if req.Method == "GET" && req.URL.Path == "/_inspect/requests" {
+                       if authToken == "" || req.Header.Get("Authorization") != "Bearer "+authToken {
+                               Error(w, "unauthorized", http.StatusUnauthorized)
+                               return
+                       }
+                       mtx.Lock()
+                       defer mtx.Unlock()
+                       type outrec struct {
+                               RequestID  string
+                               Method     string
+                               Host       string
+                               URL        string
+                               RemoteAddr string
+                               Elapsed    float64
+                       }
+                       now := time.Now()
+                       outrecs := []outrec{}
+                       for req, e := range current {
+                               outrecs = append(outrecs, outrec{
+                                       RequestID:  req.Header.Get(HeaderRequestID),
+                                       Method:     req.Method,
+                                       Host:       req.Host,
+                                       URL:        req.URL.String(),
+                                       RemoteAddr: req.RemoteAddr,
+                                       Elapsed:    now.Sub(e.startTime).Seconds(),
+                               })
+                       }
+                       sort.Slice(outrecs, func(i, j int) bool { return outrecs[i].Elapsed < outrecs[j].Elapsed })
+                       w.Header().Set("Content-Type", "application/json")
+                       json.NewEncoder(w).Encode(outrecs)
+               } else {
+                       e := ent{startTime: time.Now()}
+                       mtx.Lock()
+                       current[req] = &e
+                       mtx.Unlock()
+                       go func() {
+                               <-req.Context().Done()
+                               e.hangupTime.Store(time.Now())
+                       }()
+                       defer func() {
+                               mtx.Lock()
+                               defer mtx.Unlock()
+                               delete(current, req)
+                       }()
+                       next.ServeHTTP(w, req)
+               }
+       })
+}
diff --git a/sdk/go/httpserver/inspect_test.go b/sdk/go/httpserver/inspect_test.go
new file mode 100644 (file)
index 0000000..624cedb
--- /dev/null
@@ -0,0 +1,98 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package httpserver
+
+import (
+       "context"
+       "encoding/json"
+       "net/http"
+       "net/http/httptest"
+       "strings"
+       "time"
+
+       "github.com/prometheus/client_golang/prometheus"
+       "github.com/prometheus/client_golang/prometheus/promhttp"
+       check "gopkg.in/check.v1"
+)
+
+func (s *Suite) TestInspect(c *check.C) {
+       reg := prometheus.NewRegistry()
+       h := newTestHandler()
+       mh := Inspect(reg, "abcd", h)
+       handlerReturned := make(chan struct{})
+       reqctx, reqcancel := context.WithCancel(context.Background())
+       longreq := httptest.NewRequest("GET", "/test", nil).WithContext(reqctx)
+       go func() {
+               mh.ServeHTTP(httptest.NewRecorder(), longreq)
+               close(handlerReturned)
+       }()
+       <-h.inHandler
+
+       resp := httptest.NewRecorder()
+       req := httptest.NewRequest("GET", "/_inspect/requests", nil)
+       mh.ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusUnauthorized)
+       c.Check(resp.Body.String(), check.Equals, `{"errors":["unauthorized"]}`+"\n")
+
+       resp = httptest.NewRecorder()
+       req.Header.Set("Authorization", "Bearer abcde")
+       mh.ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusUnauthorized)
+
+       resp = httptest.NewRecorder()
+       req.Header.Set("Authorization", "Bearer abcd")
+       mh.ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+       reqs := []map[string]interface{}{}
+       err := json.NewDecoder(resp.Body).Decode(&reqs)
+       c.Check(err, check.IsNil)
+       c.Check(reqs, check.HasLen, 1)
+       c.Check(reqs[0]["URL"], check.Equals, "/test")
+
+       // Request is active, so we should see active request age > 0
+       resp = httptest.NewRecorder()
+       mreq := httptest.NewRequest("GET", "/metrics", nil)
+       promhttp.HandlerFor(reg, promhttp.HandlerOpts{}).ServeHTTP(resp, mreq)
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+       c.Check(resp.Body.String(), check.Matches, `(?ms).*\narvados_max_active_request_age_seconds [0\.]*[1-9][-\d\.e]*\n.*`)
+       c.Check(resp.Body.String(), check.Matches, `(?ms).*\narvados_max_abandoned_request_age_seconds 0\n.*`)
+
+       reqcancel()
+
+       // Request context is canceled but handler hasn't returned, so
+       // we should see max abandoned request age > 0 and active ==
+       // 0. We might need to wait a short time for the cancel to
+       // propagate.
+       for deadline := time.Now().Add(time.Second); time.Now().Before(deadline); time.Sleep(time.Second / 100) {
+               resp = httptest.NewRecorder()
+               promhttp.HandlerFor(reg, promhttp.HandlerOpts{}).ServeHTTP(resp, mreq)
+               c.Assert(resp.Code, check.Equals, http.StatusOK)
+               if strings.Contains(resp.Body.String(), "\narvados_max_active_request_age_seconds 0\n") {
+                       break
+               }
+       }
+       c.Check(resp.Body.String(), check.Matches, `(?ms).*\narvados_max_active_request_age_seconds 0\n.*`)
+       c.Check(resp.Body.String(), check.Matches, `(?ms).*\narvados_max_abandoned_request_age_seconds [0\.]*[1-9][-\d\.e]*\n.*`)
+
+       h.okToProceed <- struct{}{}
+       <-handlerReturned
+
+       // Handler has returned, so we should see max abandoned
+       // request age == max active request age == 0
+       resp = httptest.NewRecorder()
+       promhttp.HandlerFor(reg, promhttp.HandlerOpts{}).ServeHTTP(resp, mreq)
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+       c.Check(resp.Body.String(), check.Matches, `(?ms).*\narvados_max_active_request_age_seconds 0\n.*`)
+       c.Check(resp.Body.String(), check.Matches, `(?ms).*\narvados_max_abandoned_request_age_seconds 0\n.*`)
+
+       // ...and no active requests at the /_monitor endpoint
+       resp = httptest.NewRecorder()
+       mh.ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+       reqs = nil
+       err = json.NewDecoder(resp.Body).Decode(&reqs)
+       c.Check(err, check.IsNil)
+       c.Assert(reqs, check.HasLen, 0)
+}
index 5a46635e9102365bbfd01c9c9c120bd8e23a7026..b71adf71181a9eb6b550093f73dbe4ab884038ce 100644 (file)
@@ -47,7 +47,13 @@ func (hn hijackNotifier) Hijack() (net.Conn, *bufio.ReadWriter, error) {
 // HandlerWithDeadline cancels the request context if the request
 // takes longer than the specified timeout without having its
 // connection hijacked.
+//
+// If timeout is 0, there is no deadline: HandlerWithDeadline is a
+// no-op.
 func HandlerWithDeadline(timeout time.Duration, next http.Handler) http.Handler {
+       if timeout == 0 {
+               return next
+       }
        return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
                ctx, cancel := context.WithCancel(r.Context())
                defer cancel()
index 64d1f3d4cfb3fc47930ad1d655ae97366af6efb0..9258fbfa58f4b5a4867651fc15aba4e9b9616dcf 100644 (file)
@@ -22,7 +22,7 @@ func (h *testHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        <-h.okToProceed
 }
 
-func newTestHandler(maxReqs int) *testHandler {
+func newTestHandler() *testHandler {
        return &testHandler{
                inHandler:   make(chan struct{}),
                okToProceed: make(chan struct{}),
@@ -30,7 +30,7 @@ func newTestHandler(maxReqs int) *testHandler {
 }
 
 func TestRequestLimiter1(t *testing.T) {
-       h := newTestHandler(10)
+       h := newTestHandler()
        l := NewRequestLimiter(1, h, nil)
        var wg sync.WaitGroup
        resps := make([]*httptest.ResponseRecorder, 10)
@@ -90,7 +90,7 @@ func TestRequestLimiter1(t *testing.T) {
 }
 
 func TestRequestLimiter10(t *testing.T) {
-       h := newTestHandler(10)
+       h := newTestHandler()
        l := NewRequestLimiter(10, h, nil)
        var wg sync.WaitGroup
        for i := 0; i < 10; i++ {
index 5c4cf7bc16c22ad8d8780714d9b0165cf2c4043b..69453959d262a792b7f09edca6b6557e8a5d8a4b 100644 (file)
@@ -6,6 +6,8 @@ class DatabaseController < ApplicationController
   skip_before_action :find_object_by_uuid
   skip_before_action :render_404_if_no_object
   before_action :admin_required
+  around_action :silence_logs, only: [:reset]
+
   def reset
     raise ArvadosModel::PermissionDeniedError unless Rails.env == 'test'
 
@@ -23,7 +25,7 @@ class DatabaseController < ApplicationController
     unexpected_uuids = user_uuids - fixture_uuids
     if unexpected_uuids.any?
       logger.error("Running in test environment, but non-fixture users exist: " +
-                   "#{unexpected_uuids}")
+                   "#{unexpected_uuids}" + "\nMaybe test users without @example.com email addresses were created?")
       raise ArvadosModel::PermissionDeniedError
     end
 
@@ -83,4 +85,17 @@ class DatabaseController < ApplicationController
     # Done.
     send_json success: true
   end
+
+  protected
+
+  def silence_logs
+    Rails.logger.info("(logging level temporarily raised to :error, see #{__FILE__})")
+    orig = ActiveRecord::Base.logger.level
+    ActiveRecord::Base.logger.level = :error
+    begin
+      yield
+    ensure
+      ActiveRecord::Base.logger.level = orig
+    end
+  end
 end
index 14630d9efa85615a09585082299290b71def8530..1d9bcbb040ab7c5dd955361704da7b9a81a3147b 100644 (file)
@@ -12,6 +12,7 @@ system_user:
   modified_by_user_uuid: zzzzz-tpzed-000000000000000
   modified_at: 2014-11-27 06:38:21.208036000 Z
   email: root
+  username: root
   first_name: root
   last_name: ''
   identity_url:
@@ -171,7 +172,7 @@ spectator:
 container_runtime_token_user:
   owner_uuid: zzzzz-tpzed-000000000000000
   uuid: zzzzz-tpzed-l3skomkti0c4vg4
-  email: spectator@arvados.local
+  email: container_runtime_token_user@arvados.local
   first_name: Spect
   last_name: Ator
   identity_url: https://container_runtime_token_user.openid.local
@@ -193,6 +194,7 @@ inactive_uninvited:
   identity_url: https://inactive-uninvited-user.openid.local
   is_active: false
   is_admin: false
+  username: inactiveuninvited
   prefs: {}
 
 inactive:
@@ -216,6 +218,7 @@ inactive_but_signed_user_agreement:
   identity_url: https://inactive-but-agreeable-user.openid.local
   is_active: false
   is_admin: false
+  username: inactiveusersignedua
   prefs:
     profile:
       organization: example.com
@@ -230,6 +233,7 @@ anonymous:
   last_name: anonymouspublic
   is_active: false
   is_admin: false
+  username: anonymous
   prefs: {}
 
 job_reader:
@@ -273,17 +277,19 @@ active_no_prefs:
   identity_url: https://active_no_prefs.openid.local
   is_active: true
   is_admin: false
+  username: activenoprefs
   prefs: {}
 
 active_no_prefs_profile_no_getting_started_shown:
   owner_uuid: zzzzz-tpzed-000000000000000
   uuid: zzzzz-tpzed-a46c98d1td4aoj4
-  email: active_no_prefs_profile@arvados.local
+  email: active_no_prefs_profile_no_gs@arvados.local
   first_name: HasPrefs
   last_name: NoProfile
   identity_url: https://active_no_prefs_profile.openid.local
   is_active: true
   is_admin: false
+  username: activenoprefsprofilenogs
   prefs:
     test: abc
 
@@ -296,6 +302,7 @@ active_no_prefs_profile_with_getting_started_shown:
   identity_url: https://active_no_prefs_profile_seen_gs.openid.local
   is_active: true
   is_admin: false
+  username: activenoprefsprofile
   prefs:
     test: abc
     getting_started_shown: 2015-03-26 12:34:56.789000000 Z
@@ -308,6 +315,7 @@ active_with_prefs_profile_no_getting_started_shown:
   last_name: NoGettingStartedShown
   identity_url: https://active_nogettinstarted.openid.local
   is_active: true
+  username: activenogettinstarted
   prefs:
     profile:
       organization: example.com
@@ -372,7 +380,7 @@ fuse:
 permission_perftest:
   owner_uuid: zzzzz-tpzed-000000000000000
   uuid: zzzzz-tpzed-permissionptest
-  email: fuse@arvados.local
+  email: permission_perftest@arvados.local
   first_name: FUSE
   last_name: User
   identity_url: https://permission_perftest.openid.local
@@ -431,4 +439,4 @@ has_can_login_permission:
   is_active: true
   is_admin: false
   modified_at: 2015-03-26 12:34:56.789000000 Z
-  username: can-login-user
+  username: canLoginUser
index 59ab3cd4389c0cc57f9a923983ee58e12bca5184..90b75f8a306019c2b646d15228da0c1c54a62956 100644 (file)
@@ -8,12 +8,15 @@ import (
        "crypto/hmac"
        "crypto/sha256"
        "encoding/base64"
+       "encoding/json"
        "encoding/xml"
        "errors"
        "fmt"
        "hash"
        "io"
+       "mime"
        "net/http"
+       "net/textproto"
        "net/url"
        "os"
        "path/filepath"
@@ -385,6 +388,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                if r.Method == "HEAD" && !objectNameGiven {
                        // HeadBucket
                        if err == nil && fi.IsDir() {
+                               err = setFileInfoHeaders(w.Header(), fs, fspath)
+                               if err != nil {
+                                       s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+                                       return true
+                               }
                                w.WriteHeader(http.StatusOK)
                        } else if os.IsNotExist(err) {
                                s3ErrorResponse(w, NoSuchBucket, "The specified bucket does not exist.", r.URL.Path, http.StatusNotFound)
@@ -394,6 +402,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        return true
                }
                if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Cluster.Collections.S3FolderObjects {
+                       err = setFileInfoHeaders(w.Header(), fs, fspath)
+                       if err != nil {
+                               s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+                               return true
+                       }
                        w.Header().Set("Content-Type", "application/x-directory")
                        w.WriteHeader(http.StatusOK)
                        return true
@@ -415,6 +428,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                // shallow copy r, and change URL path
                r := *r
                r.URL.Path = fspath
+               err = setFileInfoHeaders(w.Header(), fs, fspath)
+               if err != nil {
+                       s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+                       return true
+               }
                http.FileServer(fs).ServeHTTP(w, &r)
                return true
        case r.Method == http.MethodPut:
@@ -586,6 +604,60 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
        }
 }
 
+func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) error {
+       maybeEncode := func(s string) string {
+               for _, c := range s {
+                       if c > '\u007f' {
+                               return mime.BEncoding.Encode("UTF-8", s)
+                       }
+               }
+               return s
+       }
+       path = strings.TrimSuffix(path, "/")
+       var props map[string]interface{}
+       for {
+               fi, err := fs.Stat(path)
+               if err != nil {
+                       return err
+               }
+               switch src := fi.Sys().(type) {
+               case *arvados.Collection:
+                       props = src.Properties
+               case *arvados.Group:
+                       props = src.Properties
+               default:
+                       if err, ok := src.(error); ok {
+                               return err
+                       }
+                       // Try parent
+                       cut := strings.LastIndexByte(path, '/')
+                       if cut < 0 {
+                               return nil
+                       }
+                       path = path[:cut]
+                       continue
+               }
+               break
+       }
+       for k, v := range props {
+               if !validMIMEHeaderKey(k) {
+                       continue
+               }
+               k = "x-amz-meta-" + k
+               if s, ok := v.(string); ok {
+                       header.Set(k, maybeEncode(s))
+               } else if j, err := json.Marshal(v); err == nil {
+                       header.Set(k, maybeEncode(string(j)))
+               }
+       }
+       return nil
+}
+
+func validMIMEHeaderKey(k string) bool {
+       check := "z-" + k
+       return check != textproto.CanonicalMIMEHeaderKey(check)
+}
+
 // Call fn on the given path (directory) and its contents, in
 // lexicographic order.
 //
index 261ebb5741388a87a618d7168936e4292052a6c3..a99f3c278f6214b5764f853920c10539f7757ffe 100644 (file)
@@ -11,6 +11,7 @@ import (
        "crypto/sha256"
        "fmt"
        "io/ioutil"
+       "mime"
        "net/http"
        "net/http/httptest"
        "net/url"
@@ -39,12 +40,13 @@ type s3stage struct {
        kc         *keepclient.KeepClient
        proj       arvados.Group
        projbucket *s3.Bucket
+       subproj    arvados.Group
        coll       arvados.Collection
        collbucket *s3.Bucket
 }
 
 func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
-       var proj arvados.Group
+       var proj, subproj arvados.Group
        var coll arvados.Collection
        arv := arvados.NewClientFromEnv()
        arv.AuthToken = arvadostest.ActiveToken
@@ -52,14 +54,35 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
                "group": map[string]interface{}{
                        "group_class": "project",
                        "name":        "keep-web s3 test",
+                       "properties": map[string]interface{}{
+                               "project-properties-key": "project properties value",
+                       },
                },
                "ensure_unique_name": true,
        })
        c.Assert(err, check.IsNil)
+       err = arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
+               "group": map[string]interface{}{
+                       "owner_uuid":  proj.UUID,
+                       "group_class": "project",
+                       "name":        "keep-web s3 test subproject",
+                       "properties": map[string]interface{}{
+                               "subproject_properties_key": "subproject properties value",
+                               "invalid header key":        "this value will not be returned because key contains spaces",
+                       },
+               },
+       })
+       c.Assert(err, check.IsNil)
        err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
                "owner_uuid":    proj.UUID,
                "name":          "keep-web s3 test collection",
                "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
+               "properties": map[string]interface{}{
+                       "string":   "string value",
+                       "array":    []string{"element1", "element2"},
+                       "object":   map[string]interface{}{"key": map[string]interface{}{"key2": "value⛵"}},
+                       "nonascii": "⛵",
+               },
        }})
        c.Assert(err, check.IsNil)
        ac, err := arvadosclient.New(arv)
@@ -95,7 +118,8 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
                        S3:   client,
                        Name: proj.UUID,
                },
-               coll: coll,
+               subproj: subproj,
+               coll:    coll,
                collbucket: &s3.Bucket{
                        S3:   client,
                        Name: coll.UUID,
@@ -215,6 +239,73 @@ func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix
        c.Check(exists, check.Equals, true)
 }
 
+func (s *IntegrationSuite) checkMetaEquals(c *check.C, hdr http.Header, expect map[string]string) {
+       got := map[string]string{}
+       for hk, hv := range hdr {
+               if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 {
+                       got[k] = hv[0]
+               }
+       }
+       c.Check(got, check.DeepEquals, expect)
+}
+
+func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+
+       expectCollectionTags := map[string]string{
+               "String":   "string value",
+               "Array":    `["element1","element2"]`,
+               "Object":   mime.BEncoding.Encode("UTF-8", `{"key":{"key2":"value⛵"}}`),
+               "Nonascii": "=?UTF-8?b?4pu1?=",
+       }
+       expectSubprojectTags := map[string]string{
+               "Subproject_properties_key": "subproject properties value",
+       }
+       expectProjectTags := map[string]string{
+               "Project-Properties-Key": "project properties value",
+       }
+
+       c.Log("HEAD object with metadata from collection")
+       resp, err := stage.collbucket.Head("sailboat.txt", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+       c.Log("GET object with metadata from collection")
+       rdr, hdr, err := stage.collbucket.GetReaderWithHeaders("sailboat.txt")
+       c.Assert(err, check.IsNil)
+       content, err := ioutil.ReadAll(rdr)
+       c.Check(err, check.IsNil)
+       rdr.Close()
+       c.Check(content, check.HasLen, 4)
+       s.checkMetaEquals(c, hdr, expectCollectionTags)
+
+       c.Log("HEAD bucket with metadata from collection")
+       resp, err = stage.collbucket.Head("/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+       c.Log("HEAD directory placeholder with metadata from collection")
+       resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+       c.Log("HEAD file with metadata from collection")
+       resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+       c.Log("HEAD directory placeholder with metadata from subproject")
+       resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectSubprojectTags)
+
+       c.Log("HEAD bucket with metadata from project")
+       resp, err = stage.projbucket.Head("/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectProjectTags)
+}
+
 func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {
        stage := s.s3setup(c)
        defer stage.teardown(c)
index da8a21efa37c8a8db91b925bc56040f7bff494b8..5c6691ab95279920498875a7e49295c8a2b4a5a4 100755 (executable)
@@ -10,6 +10,7 @@ require 'etc'
 require 'fileutils'
 require 'yaml'
 require 'optparse'
+require 'open3'
 
 req_envs = %w(ARVADOS_API_HOST ARVADOS_API_TOKEN ARVADOS_VIRTUAL_MACHINE_UUID)
 req_envs.each do |k|
@@ -124,11 +125,12 @@ begin
     unless pwnam[l[:username]]
       STDERR.puts "Creating account #{l[:username]}"
       # Create new user
-      unless system("useradd", "-m",
+      out, st = Open3.capture2e("useradd", "-m",
                 "-c", username,
                 "-s", "/bin/bash",
                 username)
-        STDERR.puts "Account creation failed for #{l[:username]}: #{$?}"
+      if st.exitstatus != 0
+        STDERR.puts "Account creation failed for #{l[:username]}:\n#{out}"
         next
       end
       begin
@@ -150,7 +152,10 @@ begin
       if existing_groups.index(addgroup).nil?
         # User should be in group, but isn't, so add them.
         STDERR.puts "Add user #{username} to #{addgroup} group"
-        system("usermod", "-aG", addgroup, username)
+        out, st = Open3.capture2e("usermod", "-aG", addgroup, username)
+        if st.exitstatus != 0
+          STDERR.puts "Failed to add #{username} to #{addgroup} group:\n#{out}"
+        end
       end
     end
 
@@ -158,7 +163,10 @@ begin
       if groups.index(removegroup).nil?
         # User is in a group, but shouldn't be, so remove them.
         STDERR.puts "Remove user #{username} from #{removegroup} group"
-        system("gpasswd", "-d", username, removegroup)
+        out, st = Open3.capture2e("gpasswd", "-d", username, removegroup)
+        if st.exitstatus != 0
+          STDERR.puts "Failed to remove user #{username} from #{removegroup} group:\n#{out}"
+        end
       end
     end
 
index f41b6ac5b36d45fc1e4ff0503c2072890c22a81a..02653082f30cafb75c03d28764d7b9115aab9214 100644 (file)
@@ -93,7 +93,7 @@ arvados:
     resources:
       virtual_machines:
         shell:
-          name: shell
+          name: shell.__CLUSTER__.__DOMAIN__
           backend: __SHELL_INT_IP__
           port: 4200
 
index e06ddd041c9acb4d01a1bab8a3deb8de6253f287..d6320da24651612e760178fa598bdd0fb6353b83 100644 (file)
@@ -19,7 +19,7 @@ postgres:
   users:
     __CLUSTER___arvados:
       ensure: present
-      password: __DATABASE_PASSWORD__
+      password: "__DATABASE_PASSWORD__"
 
   # tablespaces:
   #   arvados_tablespace:
index 86c591e97ed3679eb8687fddb19939839e63ad92..9028b9b1001f2f297d170f4d882574d42b875548 100644 (file)
@@ -75,6 +75,13 @@ extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_virtual_machine_uuid_cron_e
     - onlyif:
       - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
 
+extra_shell_cron_add_login_sync_add_{{ vm }}_sbin_to_path_cron_env_present:
+  cron.env_present:
+    - name: PATH
+    - value: "/bin:/usr/bin:/usr/sbin"
+    - onlyif:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+
 extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_login_sync_cron_present:
   cron.present:
     - name: /usr/local/bin/arvados-login-sync
index dfddf3b62361ae66305f178bd3c0a84436a50082..cf087797159077c42334f4c93fe3df54a238906a 100644 (file)
@@ -55,7 +55,7 @@ nginx:
       - add_header: 'Strict-Transport-Security "max-age=63072000" always'
 
       # OCSP stapling
-      # FIXME! Stapling does not work with self-signed certificates, so disabling for tests
+      # NOTE! Stapling does not work with self-signed certificates, so disabling for tests
       # - ssl_stapling: 'on'
       # - ssl_stapling_verify: 'on'
 
index f3bc09f65036c7349e8f9f9fa1cd21746c25cdec..edb961ebaaeccca0899d0c2633ca7c0957369805 100644 (file)
@@ -38,7 +38,7 @@ postgres:
   users:
     __CLUSTER___arvados:
       ensure: present
-      password: __DATABASE_PASSWORD__
+      password: "__DATABASE_PASSWORD__"
 
   # tablespaces:
   #   arvados_tablespace:
index 379f4765cb0aa88689f31d99bf5c03ea84d5e560..c2d34ea28c1dd2c0551473eac8943973d5183804 100644 (file)
@@ -12,7 +12,7 @@ arvados_test_salt_states_examples_single_host_etc_hosts_host_present:
     - ip: 127.0.1.1
     - names:
       - {{ arvados.cluster.name }}.{{ arvados.cluster.domain }}
-      # FIXME! This just works for our testings.
+      # NOTE! This just works for our testings.
       # Won't work if the cluster name != host name
       {%- for entry in [
           'api',
index 21c1510de8aa36a153d76d2c7bd8ee8ae44d4cd2..26e2baf0446b861fccadfd466da74414c9c77856 100644 (file)
@@ -55,7 +55,7 @@ nginx:
       - add_header: 'Strict-Transport-Security "max-age=63072000" always'
 
       # OCSP stapling
-      # FIXME! Stapling does not work with self-signed certificates, so disabling for tests
+      # NOTE! Stapling does not work with self-signed certificates, so disabling for tests
       # - ssl_stapling: 'on'
       # - ssl_stapling_verify: 'on'
 
index a69b88cb173aa4f72e17343997c65d072456b9b9..14452a990541bf47fee379a33345895f6652cbd8 100644 (file)
@@ -40,7 +40,7 @@ postgres:
   users:
     __CLUSTER___arvados:
       ensure: present
-      password: __DATABASE_PASSWORD__
+      password: "__DATABASE_PASSWORD__"
 
   # tablespaces:
   #   arvados_tablespace:
index a688f4f8c11535fdcaaac7b33eaaccf5cddd16c9..51308fffa2c75445df9cd41287675a4a8d4aaedd 100644 (file)
@@ -21,7 +21,7 @@ arvados_test_salt_states_examples_single_host_etc_hosts_host_present:
     - ip: 127.0.1.1
     - names:
       - {{ arvados.cluster.name }}.{{ arvados.cluster.domain }}
-      # FIXME! This just works for our testing.
+      # NOTE! This just works for our testing.
       # Won't work if the cluster name != host name
       {%- for entry in [
           'api',
diff --git a/tools/salt-install/installer.sh b/tools/salt-install/installer.sh
new file mode 100755 (executable)
index 0000000..e5ff7be
--- /dev/null
@@ -0,0 +1,257 @@
+#!/bin/bash
+
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: CC-BY-SA-3.0
+
+#
+# installer.sh
+#
+# Helps manage the configuration in a git repository, and then deploy
+# nodes by pushing a copy of the git repository to each node and
+# running the provision script to do the actual installation and
+# configuration.
+#
+
+set -eu
+
+# The parameter file
+declare CONFIG_FILE=local.params
+
+# The salt template directory
+declare CONFIG_DIR=local_config_dir
+
+# The 5-character Arvados cluster id
+# This will be populated by loadconfig()
+declare CLUSTER
+
+# The parent domain (not including the cluster id)
+# This will be populated by loadconfig()
+declare DOMAIN
+
+# A bash associative array listing each node and mapping to the roles
+# that should be provisioned on those nodes.
+# This will be populated by loadconfig()
+declare -A NODES
+
+# The ssh user we'll use
+# This will be populated by loadconfig()
+declare DEPLOY_USER
+
+# The git repository that we'll push to on all the nodes
+# This will be populated by loadconfig()
+declare GITTARGET
+
+sync() {
+    local NODE=$1
+    local BRANCH=$2
+
+    # Synchronizes the configuration by creating a git repository on
+    # each node, pushing our branch, and updating the checkout.
+
+    if [[ "$NODE" != localhost ]] ; then
+       if ! ssh $NODE test -d ${GITTARGET}.git ; then
+
+           # Initialize the git repository (1st time case).  We're
+           # actually going to make two repositories here because git
+           # will complain if you try to push to a repository with a
+           # checkout. So we're going to create a "bare" repository
+           # and then clone a regular repository (with a checkout)
+           # from that.
+
+           ssh $NODE git init --bare ${GITTARGET}.git
+           if ! git remote add $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git ; then
+               git remote set-url $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git
+           fi
+           git push $NODE $BRANCH
+           ssh $NODE git clone ${GITTARGET}.git ${GITTARGET}
+       fi
+
+       # The update case.
+       #
+       # Push to the bare repository on the remote node, then in the
+       # remote node repository with the checkout, pull the branch
+       # from the bare repository.
+
+       git push $NODE $BRANCH
+       ssh $NODE "git -C ${GITTARGET} checkout ${BRANCH} && git -C ${GITTARGET} pull"
+    fi
+}
+
+deploynode() {
+    local NODE=$1
+    local ROLES=$2
+
+    # Deploy a node.  This runs the provision script on the node, with
+    # the appropriate roles.
+
+    if [[ -z "$ROLES" ]] ; then
+       echo "No roles declared for '$NODE' in ${CONFIG_FILE}"
+       exit 1
+    fi
+
+    if [[ "$NODE" = localhost ]] ; then
+       sudo ./provision.sh --config ${CONFIG_FILE} --roles ${ROLES}
+    else
+       ssh $DEPLOY_USER@$NODE "cd ${GITTARGET} && sudo ./provision.sh --config ${CONFIG_FILE} --roles ${ROLES}"
+    fi
+}
+
+loadconfig() {
+    if [[ ! -s $CONFIG_FILE ]] ; then
+       echo "Must be run from initialized setup dir, maybe you need to 'initialize' first?"
+    fi
+    source ${CONFIG_FILE}
+    GITTARGET=arvados-deploy-config-${CLUSTER}
+}
+
+subcmd="$1"
+if [[ -n "$subcmd" ]] ; then
+    shift
+fi
+case "$subcmd" in
+    initialize)
+       if [[ ! -f provision.sh ]] ; then
+           echo "Must be run from arvados/tools/salt-install"
+           exit
+       fi
+
+       set +u
+       SETUPDIR=$1
+       PARAMS=$2
+       SLS=$3
+       set -u
+
+       err=
+       if [[ -z "$PARAMS" || ! -f local.params.example.$PARAMS ]] ; then
+           echo "Not found: local.params.example.$PARAMS"
+           echo "Expected one of multiple_hosts, single_host_multiple_hostnames, single_host_single_hostname"
+           err=1
+       fi
+
+       if [[ -z "$SLS" || ! -d config_examples/$SLS ]] ; then
+           echo "Not found: config_examples/$SLS"
+           echo "Expected one of multi_host/aws, single_host/multiple_hostnames, single_host/single_hostname"
+           err=1
+       fi
+
+       if [[ -z "$SETUPDIR" || -z "$PARAMS" || -z "$SLS" ]]; then
+           echo "installer.sh <setup dir to initialize> <params template> <config template>"
+           err=1
+       fi
+
+       if [[ -n "$err" ]] ; then
+           exit 1
+       fi
+
+       echo "Initializing $SETUPDIR"
+       git init $SETUPDIR
+       cp -r *.sh tests $SETUPDIR
+
+       cp local.params.example.$PARAMS $SETUPDIR/${CONFIG_FILE}
+       cp -r config_examples/$SLS $SETUPDIR/${CONFIG_DIR}
+
+       cd $SETUPDIR
+       git add *.sh ${CONFIG_FILE} ${CONFIG_DIR} tests
+       git commit -m"initial commit"
+
+       echo "setup directory initialized, now go to $SETUPDIR, edit '${CONFIG_FILE}' and '${CONFIG_DIR}' as needed, then run 'installer.sh deploy'"
+       ;;
+    deploy)
+       set +u
+       NODE=$1
+       set -u
+
+       loadconfig
+
+       if grep -rni 'fixme' ${CONFIG_FILE} ${CONFIG_DIR} ; then
+           echo
+           echo "Some parameters still need to be updated.  Please fix them and then re-run deploy."
+           exit 1
+       fi
+
+       BRANCH=$(git branch --show-current)
+
+       set -x
+
+       git add -A
+       if ! git diff --cached --exit-code ; then
+           git commit -m"prepare for deploy"
+       fi
+
+       if [[ -z "$NODE" ]]; then
+           for NODE in "${!NODES[@]}"
+           do
+               # First, push the git repo to each node.  This also
+               # confirms that we have git and can log into each
+               # node.
+               sync $NODE $BRANCH
+           done
+
+           for NODE in "${!NODES[@]}"
+           do
+               # Do 'database' role first,
+               if [[ "${NODES[$NODE]}" =~ database ]] ; then
+                   deploynode $NODE ${NODES[$NODE]}
+                   unset NODES[$NODE]
+               fi
+           done
+
+           for NODE in "${!NODES[@]}"
+           do
+               # then  'api' or 'controller' roles
+               if [[ "${NODES[$NODE]}" =~ (api|controller) ]] ; then
+                   deploynode $NODE ${NODES[$NODE]}
+                   unset NODES[$NODE]
+               fi
+           done
+
+           for NODE in "${!NODES[@]}"
+           do
+               # Everything else (we removed the nodes that we
+               # already deployed from the list)
+               deploynode $NODE ${NODES[$NODE]}
+           done
+       else
+           # Just deploy the node that was supplied on the command line.
+           sync $NODE $BRANCH
+           deploynode $NODE
+       fi
+
+       echo
+       echo "Completed deploy, run 'installer.sh diagnostics' to verify the install"
+
+       ;;
+    diagnostics)
+       loadconfig
+
+       set +u
+       declare LOCATION=$1
+       set -u
+
+       if ! which arvados-client ; then
+           echo "arvados-client not found, install 'arvados-client' package with 'apt-get' or 'yum'"
+           exit 1
+       fi
+
+       if [[ -z "$LOCATION" ]] ; then
+           echo "Need to provide '-internal-client' or '-external-client'"
+           echo
+           echo "-internal-client    You are running this on the same private network as the Arvados cluster (e.g. on one of the Arvados nodes)"
+           echo "-external-client    You are running this outside the private network of the Arvados cluster (e.g. your workstation)"
+           exit 1
+       fi
+
+       export ARVADOS_API_HOST="${CLUSTER}.${DOMAIN}"
+       export ARVADOS_API_TOKEN="$SYSTEM_ROOT_TOKEN"
+
+       arvados-client diagnostics $LOCATION
+       ;;
+    *)
+       echo "Arvados installer"
+       echo ""
+       echo "initialize   initialize the setup directory for configuration"
+       echo "deploy       deploy the configuration from the setup directory"
+       echo "diagnostics  check your install using diagnostics"
+       ;;
+esac
index 31a69e9840cdfabbee21609b662816b7df60c362..ade1ad46715fd0440b703aa63b5379ff4cf73ce1 100644 (file)
@@ -8,9 +8,26 @@
 # The Arvados cluster ID, needs to be 5 lowercase alphanumeric characters.
 CLUSTER="cluster_fixme_or_this_wont_work"
 
-# The domainname you want tou give to your cluster's hosts
+# The domain name you want to give to your cluster's hosts
+# the end result hostnames will be $SERVICE.$CLUSTER.$DOMAIN
 DOMAIN="domain_fixme_or_this_wont_work"
 
+# For multi-node installs, the ssh log in for each node
+# must be root or able to sudo
+DEPLOY_USER=root
+
+# The mapping of nodes to roles
+# installer.sh will log in to each of these nodes and then provision
+# it for the specified roles.
+NODES=(
+  [controller.${CLUSTER}.${DOMAIN}]=api,controller,websocket,dispatcher,keepbalance
+  [keep0.${CLUSTER}.${DOMAIN}]=keepstore
+  [keep1.${CLUSTER}.${DOMAIN}]=keepstore
+  [keep.${CLUSTER}.${DOMAIN}]=keepproxy,keepweb
+  [workbench.${CLUSTER}.${DOMAIN}]=workbench,workbench2,webshell
+  [shell.${CLUSTER}.${DOMAIN}]=shell
+)
+
 # Host SSL port where you want to point your browser to access Arvados
 # Defaults to 443 for regular runs, and to 8443 when called in Vagrant.
 # You can point it to another port if desired
index 2ce1556511bc7d57ddc5a58f53b5840de7353abf..20f334166e419ee806b608ac37fd3a27b10dca82 100644 (file)
@@ -11,6 +11,17 @@ CLUSTER="cluster_fixme_or_this_wont_work"
 # The domainname you want tou give to your cluster's hosts
 DOMAIN="domain_fixme_or_this_wont_work"
 
+# For multi-node installs, the ssh log in for each node
+# must be root or able to sudo
+DEPLOY_USER=root
+
+# The mapping of nodes to roles
+# installer.sh will log in to each of these nodes and then provision
+# it for the specified roles.
+NODES=(
+  [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell
+)
+
 # External ports used by the Arvados services
 CONTROLLER_EXT_SSL_PORT=443
 KEEP_EXT_SSL_PORT=25101
index 7add9868d9223f90c53d3ef209aa57d875a7328c..a68450094161accb43ef472def621e15b20b2d79 100644 (file)
@@ -11,6 +11,17 @@ CLUSTER="cluster_fixme_or_this_wont_work"
 # The domainname for your cluster's hosts
 DOMAIN="domain_fixme_or_this_wont_work"
 
+# For multi-node installs, the ssh log in for each node
+# must be root or able to sudo
+DEPLOY_USER=root
+
+# The mapping of nodes to roles
+# installer.sh will log in to each of these nodes and then provision
+# it for the specified roles.
+NODES=(
+  [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell
+)
+
 # Set this value when installing a cluster in a single host with a single
 # hostname to access all the instances. HOSTNAME_EXT should be set to the
 # external hostname for the instance.
index 3c5fb41e0ffc4cf02469e8ffb6d597aca419ea45..f4660be370990302cadbb9b3e11d8f2a44f5de10 100755 (executable)
@@ -237,6 +237,8 @@ T_DIR="/tmp/cluster_tests"
 
 arguments ${@}
 
+declare -A NODES
+
 if [ -s ${CONFIG_FILE} ]; then
   source ${CONFIG_FILE}
 else
@@ -255,7 +257,7 @@ if [ ! -d ${CONFIG_DIR} ]; then
   exit 1
 fi
 
-if grep -q 'fixme_or_this_wont_work' ${CONFIG_FILE} ; then
+if grep -rni 'fixme' ${CONFIG_FILE} ${CONFIG_DIR} ; then
   echo >&2 "The config file ${CONFIG_FILE} has some parameters that need to be modified."
   echo >&2 "Please, fix them and re-run the provision script."
   exit 1
diff --git a/tools/sync-users/.gitignore b/tools/sync-users/.gitignore
new file mode 100644 (file)
index 0000000..cbbc176
--- /dev/null
@@ -0,0 +1 @@
+sync-users
\ No newline at end of file
diff --git a/tools/sync-users/sync-users.go b/tools/sync-users/sync-users.go
new file mode 100644 (file)
index 0000000..37b94a9
--- /dev/null
@@ -0,0 +1,544 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "bytes"
+       "encoding/csv"
+       "encoding/json"
+       "flag"
+       "fmt"
+       "io"
+       "log"
+       "net/url"
+       "os"
+       "regexp"
+       "strconv"
+       "strings"
+
+       "git.arvados.org/arvados.git/lib/cmd"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+)
+
+var version = "dev"
+
+type resourceList interface {
+       Len() int
+       GetItems() []interface{}
+}
+
+// UserList implements resourceList interface
+type UserList struct {
+       arvados.UserList
+}
+
+// Len returns the amount of items this list holds
+func (l UserList) Len() int {
+       return len(l.Items)
+}
+
+// GetItems returns the list of items
+func (l UserList) GetItems() (out []interface{}) {
+       for _, item := range l.Items {
+               out = append(out, item)
+       }
+       return
+}
+
+func main() {
+       cfg, err := GetConfig()
+       if err != nil {
+               log.Fatalf("%v", err)
+       }
+
+       if err := doMain(&cfg); err != nil {
+               log.Fatalf("%v", err)
+       }
+}
+
+type ConfigParams struct {
+       CaseInsensitive    bool
+       Client             *arvados.Client
+       ClusterID          string
+       CurrentUser        arvados.User
+       DeactivateUnlisted bool
+       Path               string
+       UserID             string
+       SysUserUUID        string
+       AnonUserUUID       string
+       Verbose            bool
+}
+
+func ParseFlags(cfg *ConfigParams) error {
+       // Acceptable attributes to identify a user on the CSV file
+       userIDOpts := map[string]bool{
+               "email":    true, // default
+               "username": true,
+       }
+
+       flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
+       flags.Usage = func() {
+               usageStr := `Synchronize remote users into Arvados from a CSV format file with 5 columns:
+  * 1st: User Identifier (email or username)
+  * 2nd: First name
+  * 3rd: Last name
+  * 4th: Active status (0 or 1)
+  * 5th: Admin status (0 or 1)`
+               fmt.Fprintf(flags.Output(), "%s\n\n", usageStr)
+               fmt.Fprintf(flags.Output(), "Usage:\n%s [OPTIONS] <input-file.csv>\n\n", os.Args[0])
+               fmt.Fprintf(flags.Output(), "Options:\n")
+               flags.PrintDefaults()
+       }
+
+       caseInsensitive := flags.Bool(
+               "case-insensitive",
+               false,
+               "Performs case insensitive matching on user IDs. Always ON when using 'email' user IDs.")
+       deactivateUnlisted := flags.Bool(
+               "deactivate-unlisted",
+               false,
+               "Deactivate users that are not in the input file.")
+       userID := flags.String(
+               "user-id",
+               "email",
+               "Attribute by which every user is identified. Valid values are: email and username.")
+       verbose := flags.Bool(
+               "verbose",
+               false,
+               "Log informational messages.")
+       getVersion := flags.Bool(
+               "version",
+               false,
+               "Print version information and exit.")
+
+       if ok, code := cmd.ParseFlags(flags, os.Args[0], os.Args[1:], "input-file.csv", os.Stderr); !ok {
+               os.Exit(code)
+       } else if *getVersion {
+               fmt.Printf("%s %s\n", os.Args[0], version)
+               os.Exit(0)
+       }
+
+       // Input file as a required positional argument
+       if flags.NArg() == 0 {
+               return fmt.Errorf("please provide a path to an input file")
+       }
+       srcPath := &os.Args[flags.NFlag()+1]
+
+       // Validations
+       if *srcPath == "" {
+               return fmt.Errorf("input file path invalid")
+       }
+       if !userIDOpts[*userID] {
+               var options []string
+               for opt := range userIDOpts {
+                       options = append(options, opt)
+               }
+               return fmt.Errorf("user ID must be one of: %s", strings.Join(options, ", "))
+       }
+       if *userID == "email" {
+               // Always do case-insensitive email addresses matching
+               *caseInsensitive = true
+       }
+
+       cfg.CaseInsensitive = *caseInsensitive
+       cfg.DeactivateUnlisted = *deactivateUnlisted
+       cfg.Path = *srcPath
+       cfg.UserID = *userID
+       cfg.Verbose = *verbose
+
+       return nil
+}
+
+// GetConfig sets up a ConfigParams struct
+func GetConfig() (cfg ConfigParams, err error) {
+       err = ParseFlags(&cfg)
+       if err != nil {
+               return
+       }
+
+       cfg.Client = arvados.NewClientFromEnv()
+
+       // Check current user permissions
+       u, err := cfg.Client.CurrentUser()
+       if err != nil {
+               return cfg, fmt.Errorf("error getting the current user: %s", err)
+       }
+       if !u.IsAdmin {
+               return cfg, fmt.Errorf("current user %q is not an admin user", u.UUID)
+       }
+       if cfg.Verbose {
+               log.Printf("Running as admin user %q (%s)", u.Email, u.UUID)
+       }
+       cfg.CurrentUser = u
+
+       var ac struct {
+               ClusterID string
+               Login     struct {
+                       LoginCluster string
+               }
+       }
+       err = cfg.Client.RequestAndDecode(&ac, "GET", "arvados/v1/config", nil, nil)
+       if err != nil {
+               return cfg, fmt.Errorf("error getting the exported config: %s", err)
+       }
+       if ac.Login.LoginCluster != "" && ac.Login.LoginCluster != ac.ClusterID {
+               return cfg, fmt.Errorf("cannot run on a cluster other than the login cluster")
+       }
+       cfg.SysUserUUID = ac.ClusterID + "-tpzed-000000000000000"
+       cfg.AnonUserUUID = ac.ClusterID + "-tpzed-anonymouspublic"
+       cfg.ClusterID = ac.ClusterID
+
+       return cfg, nil
+}
+
+// GetUserID returns the correct user id value depending on the selector
+func GetUserID(u arvados.User, idSelector string) (string, error) {
+       switch idSelector {
+       case "email":
+               return u.Email, nil
+       case "username":
+               return u.Username, nil
+       default:
+               return "", fmt.Errorf("cannot identify user by %q selector", idSelector)
+       }
+}
+
+func doMain(cfg *ConfigParams) error {
+       // Try opening the input file early, just in case there's a problem.
+       f, err := os.Open(cfg.Path)
+       if err != nil {
+               return fmt.Errorf("error opening input file: %s", err)
+       }
+       defer f.Close()
+
+       iCaseLog := ""
+       if cfg.UserID == "username" && cfg.CaseInsensitive {
+               iCaseLog = " - username matching requested to be case-insensitive"
+       }
+       log.Printf("%s %s started. Using %q as users id%s", os.Args[0], version, cfg.UserID, iCaseLog)
+
+       allUsers := make(map[string]arvados.User)
+       userIDToUUID := make(map[string]string) // Index by email or username
+       dupedEmails := make(map[string][]arvados.User)
+       processedUsers := make(map[string]bool)
+       results, err := GetAll(cfg.Client, "users", arvados.ResourceListParams{}, &UserList{})
+       if err != nil {
+               return fmt.Errorf("error getting all users: %s", err)
+       }
+       log.Printf("Found %d users in cluster %q", len(results), cfg.ClusterID)
+       localUserUuidRegex := regexp.MustCompile(fmt.Sprintf("^%s-tpzed-[0-9a-z]{15}$", cfg.ClusterID))
+       for _, item := range results {
+               u := item.(arvados.User)
+
+               // Remote user check
+               if !localUserUuidRegex.MatchString(u.UUID) {
+                       if cfg.Verbose {
+                               log.Printf("Remote user %q (%s) won't be considered for processing", u.Email, u.UUID)
+                       }
+                       continue
+               }
+
+               // Duplicated user id check
+               uID, err := GetUserID(u, cfg.UserID)
+               if err != nil {
+                       return err
+               }
+               if uID == "" {
+                       return fmt.Errorf("%s is empty for user with uuid %q", cfg.UserID, u.UUID)
+               }
+               if cfg.CaseInsensitive {
+                       uID = strings.ToLower(uID)
+               }
+               if alreadySeenUUID, found := userIDToUUID[uID]; found {
+                       if cfg.UserID == "username" && uID != "" {
+                               return fmt.Errorf("case insensitive collision for username %q between %q and %q", uID, u.UUID, alreadySeenUUID)
+                       } else if cfg.UserID == "email" && uID != "" {
+                               log.Printf("Duplicated email %q found in user %s - ignoring", uID, u.UUID)
+                               if len(dupedEmails[uID]) == 0 {
+                                       dupedEmails[uID] = []arvados.User{allUsers[alreadySeenUUID]}
+                               }
+                               dupedEmails[uID] = append(dupedEmails[uID], u)
+                               delete(allUsers, alreadySeenUUID) // Skip even the first occurrence,
+                               // for security purposes.
+                               continue
+                       }
+               }
+               if cfg.Verbose {
+                       log.Printf("Seen user %q (%s)", uID, u.UUID)
+               }
+               userIDToUUID[uID] = u.UUID
+               allUsers[u.UUID] = u
+               processedUsers[u.UUID] = false
+       }
+
+       loadedRecords, err := LoadInputFile(f)
+       if err != nil {
+               return fmt.Errorf("reading input file %q: %s", cfg.Path, err)
+       }
+       log.Printf("Loaded %d records from input file", len(loadedRecords))
+
+       updatesSucceeded := map[string]bool{}
+       updatesFailed := map[string]bool{}
+       updatesSkipped := map[string]bool{}
+
+       for _, record := range loadedRecords {
+               if cfg.CaseInsensitive {
+                       record.UserID = strings.ToLower(record.UserID)
+               }
+               recordUUID := userIDToUUID[record.UserID]
+               processedUsers[recordUUID] = true
+               if cfg.UserID == "email" && record.UserID == cfg.CurrentUser.Email {
+                       updatesSkipped[recordUUID] = true
+                       log.Printf("Skipping current user %q (%s) from processing", record.UserID, cfg.CurrentUser.UUID)
+                       continue
+               }
+               if updated, err := ProcessRecord(cfg, record, userIDToUUID, allUsers); err != nil {
+                       log.Printf("error processing record %q: %s", record.UserID, err)
+                       updatesFailed[recordUUID] = true
+               } else if updated {
+                       updatesSucceeded[recordUUID] = true
+               }
+       }
+
+       if cfg.DeactivateUnlisted {
+               for userUUID, user := range allUsers {
+                       if shouldSkip(cfg, user) {
+                               updatesSkipped[userUUID] = true
+                               log.Printf("Skipping unlisted user %q (%s) from deactivating", user.Email, user.UUID)
+                               continue
+                       }
+                       if !processedUsers[userUUID] && allUsers[userUUID].IsActive {
+                               if cfg.Verbose {
+                                       log.Printf("Deactivating unlisted user %q (%s)", user.Username, user.UUID)
+                               }
+                               var updatedUser arvados.User
+                               if err := UnsetupUser(cfg.Client, user.UUID, &updatedUser); err != nil {
+                                       log.Printf("error deactivating unlisted user %q: %s", user.UUID, err)
+                                       updatesFailed[userUUID] = true
+                               } else {
+                                       allUsers[userUUID] = updatedUser
+                                       updatesSucceeded[userUUID] = true
+                               }
+                       }
+               }
+       }
+
+       log.Printf("User update successes: %d, skips: %d, failures: %d", len(updatesSucceeded), len(updatesSkipped), len(updatesFailed))
+
+       // Report duplicated emails detection
+       if len(dupedEmails) > 0 {
+               emails := make([]string, len(dupedEmails))
+               i := 0
+               for e := range dupedEmails {
+                       emails[i] = e
+                       i++
+               }
+               return fmt.Errorf("skipped %d duplicated email address(es) in the cluster's local user list: %v", len(dupedEmails), emails)
+       }
+
+       return nil
+}
+
+func shouldSkip(cfg *ConfigParams, user arvados.User) bool {
+       switch user.UUID {
+       case cfg.SysUserUUID, cfg.AnonUserUUID:
+               return true
+       case cfg.CurrentUser.UUID:
+               return true
+       }
+       return false
+}
+
+type userRecord struct {
+       UserID    string
+       FirstName string
+       LastName  string
+       Active    bool
+       Admin     bool
+}
+
+func needsUpdating(user arvados.User, record userRecord) bool {
+       userData := userRecord{"", user.FirstName, user.LastName, user.IsActive, user.IsAdmin}
+       recordData := userRecord{"", record.FirstName, record.LastName, record.Active, record.Admin}
+       return userData != recordData
+}
+
+// ProcessRecord creates or updates a user based on the given record
+func ProcessRecord(cfg *ConfigParams, record userRecord, userIDToUUID map[string]string, allUsers map[string]arvados.User) (bool, error) {
+       if cfg.Verbose {
+               log.Printf("Processing record for user %q", record.UserID)
+       }
+
+       wantedActiveStatus := strconv.FormatBool(record.Active)
+       wantedAdminStatus := strconv.FormatBool(record.Active && record.Admin)
+       createRequired := false
+       updateRequired := false
+       // Check if user exists, set its active & admin status.
+       var user arvados.User
+       recordUUID := userIDToUUID[record.UserID]
+       user, found := allUsers[recordUUID]
+       if !found {
+               if cfg.Verbose {
+                       log.Printf("User %q does not exist, creating", record.UserID)
+               }
+               createRequired = true
+               err := CreateUser(cfg.Client, &user, map[string]string{
+                       cfg.UserID:   record.UserID,
+                       "first_name": record.FirstName,
+                       "last_name":  record.LastName,
+                       "is_active":  wantedActiveStatus,
+                       "is_admin":   wantedAdminStatus,
+               })
+               if err != nil {
+                       return false, fmt.Errorf("error creating user %q: %s", record.UserID, err)
+               }
+       } else if needsUpdating(user, record) {
+               updateRequired = true
+               if record.Active {
+                       if !user.IsActive && cfg.Verbose {
+                               log.Printf("User %q (%s) is inactive, activating", record.UserID, user.UUID)
+                       }
+                       // Here we assume the 'setup' is done elsewhere if needed.
+                       err := UpdateUser(cfg.Client, user.UUID, &user, map[string]string{
+                               "first_name": record.FirstName,
+                               "last_name":  record.LastName,
+                               "is_active":  wantedActiveStatus,
+                               "is_admin":   wantedAdminStatus,
+                       })
+                       if err != nil {
+                               return false, fmt.Errorf("error updating user %q: %s", record.UserID, err)
+                       }
+               } else {
+                       fnChanged := user.FirstName != record.FirstName
+                       lnChanged := user.LastName != record.LastName
+                       if fnChanged || lnChanged {
+                               err := UpdateUser(cfg.Client, user.UUID, &user, map[string]string{
+                                       "first_name": record.FirstName,
+                                       "last_name":  record.LastName,
+                               })
+                               if err != nil {
+                                       return false, fmt.Errorf("error updating user %q: %s", record.UserID, err)
+                               }
+                       }
+                       if user.IsActive {
+                               if cfg.Verbose {
+                                       log.Printf("User %q is active, deactivating", record.UserID)
+                               }
+                               err := UnsetupUser(cfg.Client, user.UUID, &user)
+                               if err != nil {
+                                       return false, fmt.Errorf("error deactivating user %q: %s", record.UserID, err)
+                               }
+                       }
+               }
+       }
+       allUsers[record.UserID] = user
+       if createRequired {
+               log.Printf("Created user %q", record.UserID)
+       }
+       if updateRequired {
+               log.Printf("Updated user %q", record.UserID)
+       }
+
+       return createRequired || updateRequired, nil
+}
+
+// LoadInputFile reads the input file and returns a list of user records
+func LoadInputFile(f *os.File) (loadedRecords []userRecord, err error) {
+       lineNo := 0
+       csvReader := csv.NewReader(f)
+       loadedRecords = make([]userRecord, 0)
+
+       for {
+               record, e := csvReader.Read()
+               if e == io.EOF {
+                       break
+               }
+               lineNo++
+               if e != nil {
+                       err = fmt.Errorf("parsing error at line %d: %s", lineNo, e)
+                       return
+               }
+               if len(record) != 5 {
+                       err = fmt.Errorf("parsing error at line %d: expected 5 fields, found %d", lineNo, len(record))
+                       return
+               }
+               userID := strings.ToLower(strings.TrimSpace(record[0]))
+               firstName := strings.TrimSpace(record[1])
+               lastName := strings.TrimSpace(record[2])
+               active := strings.TrimSpace(record[3])
+               admin := strings.TrimSpace(record[4])
+               if userID == "" || firstName == "" || lastName == "" || active == "" || admin == "" {
+                       err = fmt.Errorf("parsing error at line %d: fields cannot be empty", lineNo)
+                       return
+               }
+               activeBool, err := strconv.ParseBool(active)
+               if err != nil {
+                       return nil, fmt.Errorf("parsing error at line %d: active status not recognized", lineNo)
+               }
+               adminBool, err := strconv.ParseBool(admin)
+               if err != nil {
+                       return nil, fmt.Errorf("parsing error at line %d: admin status not recognized", lineNo)
+               }
+               loadedRecords = append(loadedRecords, userRecord{
+                       UserID:    userID,
+                       FirstName: firstName,
+                       LastName:  lastName,
+                       Active:    activeBool,
+                       Admin:     adminBool,
+               })
+       }
+       return loadedRecords, nil
+}
+
+// GetAll adds all objects of type 'resource' to the 'allItems' list
+func GetAll(c *arvados.Client, res string, params arvados.ResourceListParams, page resourceList) (allItems []interface{}, err error) {
+       // Use the maximum page size the server allows
+       limit := 1<<31 - 1
+       params.Limit = &limit
+       params.Offset = 0
+       params.Order = "uuid"
+       for {
+               if err = GetResourceList(c, &page, res, params); err != nil {
+                       return allItems, err
+               }
+               // Have we finished paging?
+               if page.Len() == 0 {
+                       break
+               }
+               allItems = append(allItems, page.GetItems()...)
+               params.Offset += page.Len()
+       }
+       return allItems, nil
+}
+
+func jsonReader(rscName string, ob interface{}) io.Reader {
+       j, err := json.Marshal(ob)
+       if err != nil {
+               panic(err)
+       }
+       v := url.Values{}
+       v[rscName] = []string{string(j)}
+       return bytes.NewBufferString(v.Encode())
+}
+
+// GetResourceList fetches res list using params
+func GetResourceList(c *arvados.Client, dst *resourceList, res string, params interface{}) error {
+       return c.RequestAndDecode(dst, "GET", "/arvados/v1/"+res, nil, params)
+}
+
+// CreateUser creates a user with userData parameters, assigns it to dst
+func CreateUser(c *arvados.Client, dst *arvados.User, userData map[string]string) error {
+       return c.RequestAndDecode(dst, "POST", "/arvados/v1/users", jsonReader("user", userData), nil)
+}
+
+// UpdateUser updates a user with userData parameters
+func UpdateUser(c *arvados.Client, userUUID string, dst *arvados.User, userData map[string]string) error {
+       return c.RequestAndDecode(&dst, "PUT", "/arvados/v1/users/"+userUUID, jsonReader("user", userData), nil)
+}
+
+// UnsetupUser deactivates a user
+func UnsetupUser(c *arvados.Client, userUUID string, dst *arvados.User) error {
+       return c.RequestAndDecode(&dst, "POST", "/arvados/v1/users/"+userUUID+"/unsetup", nil, nil)
+}
diff --git a/tools/sync-users/sync-users_test.go b/tools/sync-users/sync-users_test.go
new file mode 100644 (file)
index 0000000..8b5385a
--- /dev/null
@@ -0,0 +1,436 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "fmt"
+       "io/ioutil"
+       "os"
+       "regexp"
+       "strings"
+       "testing"
+
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       . "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       TestingT(t)
+}
+
+type TestSuite struct {
+       cfg   *ConfigParams
+       ac    *arvados.Client
+       users map[string]arvados.User
+}
+
+func (s *TestSuite) SetUpTest(c *C) {
+       s.ac = arvados.NewClientFromEnv()
+       u, err := s.ac.CurrentUser()
+       c.Assert(err, IsNil)
+       c.Assert(u.IsAdmin, Equals, true)
+
+       s.users = make(map[string]arvados.User)
+       ul := arvados.UserList{}
+       s.ac.RequestAndDecode(&ul, "GET", "/arvados/v1/users", nil, arvados.ResourceListParams{})
+       c.Assert(ul.ItemsAvailable, Not(Equals), 0)
+       s.users = make(map[string]arvados.User)
+       for _, u := range ul.Items {
+               s.users[u.UUID] = u
+       }
+
+       // Set up command config
+       os.Args = []string{"cmd", "somefile.csv"}
+       config, err := GetConfig()
+       c.Assert(err, IsNil)
+       s.cfg = &config
+}
+
+func (s *TestSuite) TearDownTest(c *C) {
+       var dst interface{}
+       // Reset database to fixture state after every test run.
+       err := s.cfg.Client.RequestAndDecode(&dst, "POST", "/database/reset", nil, nil)
+       c.Assert(err, IsNil)
+}
+
+var _ = Suite(&TestSuite{})
+
+// MakeTempCSVFile creates a temp file with data as comma separated values
+func MakeTempCSVFile(data [][]string) (f *os.File, err error) {
+       f, err = ioutil.TempFile("", "test_sync_users")
+       if err != nil {
+               return
+       }
+       for _, line := range data {
+               fmt.Fprintf(f, "%s\n", strings.Join(line, ","))
+       }
+       err = f.Close()
+       return
+}
+
+// RecordsToStrings formats the input data suitable for MakeTempCSVFile
+func RecordsToStrings(records []userRecord) [][]string {
+       data := [][]string{}
+       for _, u := range records {
+               data = append(data, []string{
+                       u.UserID,
+                       u.FirstName,
+                       u.LastName,
+                       fmt.Sprintf("%t", u.Active),
+                       fmt.Sprintf("%t", u.Admin)})
+       }
+       return data
+}
+
+func ListUsers(ac *arvados.Client) ([]arvados.User, error) {
+       var ul arvados.UserList
+       err := ac.RequestAndDecode(&ul, "GET", "/arvados/v1/users", nil, arvados.ResourceListParams{})
+       if err != nil {
+               return nil, err
+       }
+       return ul.Items, nil
+}
+
+func (s *TestSuite) TestParseFlagsWithoutPositionalArgument(c *C) {
+       os.Args = []string{"cmd", "-verbose"}
+       err := ParseFlags(&ConfigParams{})
+       c.Assert(err, NotNil)
+       c.Assert(err, ErrorMatches, ".*please provide a path to an input file.*")
+}
+
+func (s *TestSuite) TestParseFlagsWrongUserID(c *C) {
+       os.Args = []string{"cmd", "-user-id=nickname", "/tmp/somefile.csv"}
+       err := ParseFlags(&ConfigParams{})
+       c.Assert(err, NotNil)
+       c.Assert(err, ErrorMatches, ".*user ID must be one of:.*")
+}
+
+func (s *TestSuite) TestParseFlagsWithPositionalArgument(c *C) {
+       cfg := ConfigParams{}
+       os.Args = []string{"cmd", "/tmp/somefile.csv"}
+       err := ParseFlags(&cfg)
+       c.Assert(err, IsNil)
+       c.Assert(cfg.Path, Equals, "/tmp/somefile.csv")
+       c.Assert(cfg.Verbose, Equals, false)
+       c.Assert(cfg.DeactivateUnlisted, Equals, false)
+       c.Assert(cfg.UserID, Equals, "email")
+       c.Assert(cfg.CaseInsensitive, Equals, true)
+}
+
+func (s *TestSuite) TestParseFlagsWithOptionalFlags(c *C) {
+       cfg := ConfigParams{}
+       os.Args = []string{"cmd", "-verbose", "-deactivate-unlisted", "-user-id=username", "/tmp/somefile.csv"}
+       err := ParseFlags(&cfg)
+       c.Assert(err, IsNil)
+       c.Assert(cfg.Path, Equals, "/tmp/somefile.csv")
+       c.Assert(cfg.Verbose, Equals, true)
+       c.Assert(cfg.DeactivateUnlisted, Equals, true)
+       c.Assert(cfg.UserID, Equals, "username")
+       c.Assert(cfg.CaseInsensitive, Equals, false)
+}
+
+func (s *TestSuite) TestGetConfig(c *C) {
+       os.Args = []string{"cmd", "/tmp/somefile.csv"}
+       cfg, err := GetConfig()
+       c.Assert(err, IsNil)
+       c.Assert(cfg.AnonUserUUID, Not(Equals), "")
+       c.Assert(cfg.SysUserUUID, Not(Equals), "")
+       c.Assert(cfg.CurrentUser, Not(Equals), "")
+       c.Assert(cfg.ClusterID, Not(Equals), "")
+       c.Assert(cfg.Client, NotNil)
+}
+
+func (s *TestSuite) TestFailOnEmptyFields(c *C) {
+       records := [][]string{
+               {"", "first-name", "last-name", "1", "0"},
+               {"user@example", "", "last-name", "1", "0"},
+               {"user@example", "first-name", "", "1", "0"},
+               {"user@example", "first-name", "last-name", "", "0"},
+               {"user@example", "first-name", "last-name", "1", ""},
+       }
+       for _, record := range records {
+               data := [][]string{record}
+               tmpfile, err := MakeTempCSVFile(data)
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, NotNil)
+               c.Assert(err, ErrorMatches, ".*fields cannot be empty.*")
+       }
+}
+
+func (s *TestSuite) TestIgnoreSpaces(c *C) {
+       // Make sure users aren't already there from fixtures
+       for _, user := range s.users {
+               e := user.Email
+               found := e == "user1@example.com" || e == "user2@example.com" || e == "user3@example.com"
+               c.Assert(found, Equals, false)
+       }
+       // Use CSV data with leading/trailing whitespaces, confirm that they get ignored
+       data := [][]string{
+               {" user1@example.com", "  Example", "   User1", "1", "0"},
+               {"user2@example.com ", "Example  ", "User2   ", "1", "0"},
+               {" user3@example.com ", "  Example  ", "   User3   ", "1", "0"},
+       }
+       tmpfile, err := MakeTempCSVFile(data)
+       c.Assert(err, IsNil)
+       defer os.Remove(tmpfile.Name())
+       s.cfg.Path = tmpfile.Name()
+       err = doMain(s.cfg)
+       c.Assert(err, IsNil)
+       users, err := ListUsers(s.cfg.Client)
+       c.Assert(err, IsNil)
+       for _, userNr := range []int{1, 2, 3} {
+               found := false
+               for _, user := range users {
+                       if user.Email == fmt.Sprintf("user%d@example.com", userNr) &&
+                               user.LastName == fmt.Sprintf("User%d", userNr) &&
+                               user.FirstName == "Example" && user.IsActive == true {
+                               found = true
+                               break
+                       }
+               }
+               c.Assert(found, Equals, true)
+       }
+}
+
+// Error out when records have != 5 records
+func (s *TestSuite) TestWrongNumberOfFields(c *C) {
+       for _, testCase := range [][][]string{
+               {{"user1@example.com", "Example", "User1", "1"}},
+               {{"user1@example.com", "Example", "User1", "1", "0", "extra data"}},
+       } {
+               tmpfile, err := MakeTempCSVFile(testCase)
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, NotNil)
+               c.Assert(err, ErrorMatches, ".*expected 5 fields, found.*")
+       }
+}
+
+// Error out when records have incorrect data types
+func (s *TestSuite) TestWrongDataFields(c *C) {
+       for _, testCase := range [][][]string{
+               {{"user1@example.com", "Example", "User1", "yep", "0"}},
+               {{"user1@example.com", "Example", "User1", "1", "nope"}},
+       } {
+               tmpfile, err := MakeTempCSVFile(testCase)
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, NotNil)
+               c.Assert(err, ErrorMatches, ".*parsing error at line.*[active|admin] status not recognized.*")
+       }
+}
+
+// Create, activate and deactivate users
+func (s *TestSuite) TestUserCreationAndUpdate(c *C) {
+       for _, tc := range []string{"email", "username"} {
+               uIDPrefix := tc
+               uIDSuffix := ""
+               if tc == "email" {
+                       uIDSuffix = "@example.com"
+               }
+               s.cfg.UserID = tc
+               records := []userRecord{{
+                       UserID:    fmt.Sprintf("%suser1%s", uIDPrefix, uIDSuffix),
+                       FirstName: "Example",
+                       LastName:  "User1",
+                       Active:    true,
+                       Admin:     false,
+               }, {
+                       UserID:    fmt.Sprintf("%suser2%s", uIDPrefix, uIDSuffix),
+                       FirstName: "Example",
+                       LastName:  "User2",
+                       Active:    false, // initially inactive
+                       Admin:     false,
+               }, {
+                       UserID:    fmt.Sprintf("%sadmin1%s", uIDPrefix, uIDSuffix),
+                       FirstName: "Example",
+                       LastName:  "Admin1",
+                       Active:    true,
+                       Admin:     true,
+               }, {
+                       UserID:    fmt.Sprintf("%sadmin2%s", uIDPrefix, uIDSuffix),
+                       FirstName: "Example",
+                       LastName:  "Admin2",
+                       Active:    false, // initially inactive
+                       Admin:     true,
+               }}
+               // Make sure users aren't already there from fixtures
+               for _, user := range s.users {
+                       uID, err := GetUserID(user, s.cfg.UserID)
+                       c.Assert(err, IsNil)
+                       found := false
+                       for _, r := range records {
+                               if uID == r.UserID {
+                                       found = true
+                                       break
+                               }
+                       }
+                       c.Assert(found, Equals, false)
+               }
+               // User creation
+               tmpfile, err := MakeTempCSVFile(RecordsToStrings(records))
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, IsNil)
+
+               users, err := ListUsers(s.cfg.Client)
+               c.Assert(err, IsNil)
+               for _, r := range records {
+                       var foundUser arvados.User
+                       for _, user := range users {
+                               uID, err := GetUserID(user, s.cfg.UserID)
+                               c.Assert(err, IsNil)
+                               if uID == r.UserID {
+                                       // Add an @example.com email if missing
+                                       // (to avoid database reset errors)
+                                       if tc == "username" && user.Email == "" {
+                                               err := UpdateUser(s.cfg.Client, user.UUID, &user, map[string]string{
+                                                       "email": fmt.Sprintf("%s@example.com", user.Username),
+                                               })
+                                               c.Assert(err, IsNil)
+                                       }
+                                       foundUser = user
+                                       break
+                               }
+                       }
+                       c.Assert(foundUser, NotNil)
+                       c.Logf("Checking creation for user %q", r.UserID)
+                       c.Assert(foundUser.FirstName, Equals, r.FirstName)
+                       c.Assert(foundUser.LastName, Equals, r.LastName)
+                       c.Assert(foundUser.IsActive, Equals, r.Active)
+                       c.Assert(foundUser.IsAdmin, Equals, (r.Active && r.Admin))
+               }
+               // User update
+               for idx := range records {
+                       records[idx].Active = !records[idx].Active
+                       records[idx].FirstName = records[idx].FirstName + "Updated"
+                       records[idx].LastName = records[idx].LastName + "Updated"
+               }
+               tmpfile, err = MakeTempCSVFile(RecordsToStrings(records))
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, IsNil)
+
+               users, err = ListUsers(s.cfg.Client)
+               c.Assert(err, IsNil)
+               for _, r := range records {
+                       var foundUser arvados.User
+                       for _, user := range users {
+                               uID, err := GetUserID(user, s.cfg.UserID)
+                               c.Assert(err, IsNil)
+                               if uID == r.UserID {
+                                       foundUser = user
+                                       break
+                               }
+                       }
+                       c.Assert(foundUser, NotNil)
+                       c.Logf("Checking update for user %q", r.UserID)
+                       c.Assert(foundUser.FirstName, Equals, r.FirstName)
+                       c.Assert(foundUser.LastName, Equals, r.LastName)
+                       c.Assert(foundUser.IsActive, Equals, r.Active)
+                       c.Assert(foundUser.IsAdmin, Equals, (r.Active && r.Admin))
+               }
+       }
+}
+
+func (s *TestSuite) TestDeactivateUnlisted(c *C) {
+       localUserUuidRegex := regexp.MustCompile(fmt.Sprintf("^%s-tpzed-[0-9a-z]{15}$", s.cfg.ClusterID))
+       users, err := ListUsers(s.cfg.Client)
+       c.Assert(err, IsNil)
+       previouslyActiveUsers := 0
+       for _, u := range users {
+               if u.UUID == fmt.Sprintf("%s-tpzed-anonymouspublic", s.cfg.ClusterID) && !u.IsActive {
+                       // Make sure the anonymous user is active for this test
+                       var au arvados.User
+                       err := UpdateUser(s.cfg.Client, u.UUID, &au, map[string]string{"is_active": "true"})
+                       c.Assert(err, IsNil)
+                       c.Assert(au.IsActive, Equals, true)
+               }
+               if localUserUuidRegex.MatchString(u.UUID) && u.IsActive {
+                       previouslyActiveUsers++
+               }
+       }
+       // At least 3 active users: System root, Anonymous and the current user.
+       // Other active users should exist from fixture.
+       c.Logf("Initial active users count: %d", previouslyActiveUsers)
+       c.Assert(previouslyActiveUsers > 3, Equals, true)
+
+       s.cfg.DeactivateUnlisted = true
+       s.cfg.Verbose = true
+       data := [][]string{
+               {"user1@example.com", "Example", "User1", "0", "0"},
+       }
+       tmpfile, err := MakeTempCSVFile(data)
+       c.Assert(err, IsNil)
+       defer os.Remove(tmpfile.Name())
+       s.cfg.Path = tmpfile.Name()
+       err = doMain(s.cfg)
+       c.Assert(err, IsNil)
+
+       users, err = ListUsers(s.cfg.Client)
+       c.Assert(err, IsNil)
+       currentlyActiveUsers := 0
+       acceptableActiveUUIDs := map[string]bool{
+               fmt.Sprintf("%s-tpzed-000000000000000", s.cfg.ClusterID): true,
+               fmt.Sprintf("%s-tpzed-anonymouspublic", s.cfg.ClusterID): true,
+               s.cfg.CurrentUser.UUID: true,
+       }
+       remainingActiveUUIDs := map[string]bool{}
+       seenUserEmails := map[string]bool{}
+       for _, u := range users {
+               if _, ok := seenUserEmails[u.Email]; ok {
+                       c.Errorf("Duplicated email address %q in user list (probably from fixtures). This test requires unique email addresses.", u.Email)
+               }
+               seenUserEmails[u.Email] = true
+               if localUserUuidRegex.MatchString(u.UUID) && u.IsActive {
+                       c.Logf("Found remaining active user %q (%s)", u.Email, u.UUID)
+                       _, ok := acceptableActiveUUIDs[u.UUID]
+                       c.Assert(ok, Equals, true)
+                       remainingActiveUUIDs[u.UUID] = true
+                       currentlyActiveUsers++
+               }
+       }
+       // 3 active users remaining: System root, Anonymous and the current user.
+       c.Logf("Active local users remaining: %v", remainingActiveUUIDs)
+       c.Assert(currentlyActiveUsers, Equals, 3)
+}
+
+func (s *TestSuite) TestFailOnDuplicatedEmails(c *C) {
+       for i := range []int{1, 2} {
+               isAdmin := i == 2
+               err := CreateUser(s.cfg.Client, &arvados.User{}, map[string]string{
+                       "email":      "somedupedemail@example.com",
+                       "first_name": fmt.Sprintf("Duped %d", i),
+                       "username":   fmt.Sprintf("dupedemail%d", i),
+                       "last_name":  "User",
+                       "is_active":  "true",
+                       "is_admin":   fmt.Sprintf("%t", isAdmin),
+               })
+               c.Assert(err, IsNil)
+       }
+       s.cfg.Verbose = true
+       data := [][]string{
+               {"user1@example.com", "Example", "User1", "0", "0"},
+       }
+       tmpfile, err := MakeTempCSVFile(data)
+       c.Assert(err, IsNil)
+       defer os.Remove(tmpfile.Name())
+       s.cfg.Path = tmpfile.Name()
+       err = doMain(s.cfg)
+       c.Assert(err, NotNil)
+       c.Assert(err, ErrorMatches, "skipped.*duplicated email address.*")
+}