16552: Merge branch 'main'
authorTom Clegg <tom@curii.com>
Thu, 7 Jul 2022 13:42:02 +0000 (09:42 -0400)
committerTom Clegg <tom@curii.com>
Thu, 7 Jul 2022 13:42:02 +0000 (09:42 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

35 files changed:
apps/workbench/app/views/users/_show_admin.html.erb
build/run-build-packages-one-target.sh
build/run-build-packages.sh
doc/_config.yml
doc/api/keep-s3.html.textile.liquid
doc/user/topics/arvados-sync-external-sources.html.textile.liquid [moved from doc/user/topics/arvados-sync-groups.html.textile.liquid with 51% similarity]
lib/controller/integration_test.go
sdk/go/arvados/fs_base.go
sdk/go/arvados/fs_collection.go
sdk/go/arvados/fs_deferred.go
sdk/go/arvados/fs_project.go
sdk/go/arvados/fs_site.go
sdk/go/arvados/fs_users.go
services/api/app/controllers/database_controller.rb
services/api/test/fixtures/users.yml
services/keep-web/s3.go
services/keep-web/s3_test.go
services/login-sync/bin/arvados-login-sync
tools/salt-install/config_examples/multi_host/aws/pillars/arvados.sls
tools/salt-install/config_examples/multi_host/aws/pillars/postgresql.sls
tools/salt-install/config_examples/multi_host/aws/states/shell_cron_add_login_sync.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/nginx_passenger.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/pillars/postgresql.sls
tools/salt-install/config_examples/single_host/multiple_hostnames/states/host_entries.sls
tools/salt-install/config_examples/single_host/single_hostname/pillars/nginx_passenger.sls
tools/salt-install/config_examples/single_host/single_hostname/pillars/postgresql.sls
tools/salt-install/config_examples/single_host/single_hostname/states/host_entries.sls
tools/salt-install/installer.sh [new file with mode: 0755]
tools/salt-install/local.params.example.multiple_hosts
tools/salt-install/local.params.example.single_host_multiple_hostnames
tools/salt-install/local.params.example.single_host_single_hostname
tools/salt-install/provision.sh
tools/sync-users/.gitignore [new file with mode: 0644]
tools/sync-users/sync-users.go [new file with mode: 0644]
tools/sync-users/sync-users_test.go [new file with mode: 0644]

index 1da22d438fabe1609cf09857d17ec0b6bd3c9a52..b151ceff042567e6020bdcd2a827203772be8b6d 100644 (file)
@@ -6,7 +6,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
   <div class="col-md-6">
 
     <p>
-      This page enables you to <a href="https://doc.arvados.org/master/admin/user-management.html">manage users</a>.
+      This page enables you to <a href="https://doc.arvados.org/main/admin/user-management.html">manage users</a>.
     </p>
 
     <p>
@@ -22,7 +22,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
       As an admin, you can deactivate and reset this user. This will
       remove all repository/VM permissions for the user. If you
       "setup" the user again, the user will have to sign the user
-      agreement again.  You may also want to <a href="https://doc.arvados.org/master/admin/reassign-ownership.html">reassign data ownership</a>.
+      agreement again.  You may also want to <a href="https://doc.arvados.org/main/admin/reassign-ownership.html">reassign data ownership</a>.
     </p>
 
     <%= button_to "Deactivate #{@object.full_name}", unsetup_user_url(id: @object.uuid), class: 'btn btn-primary', data: {confirm: "Are you sure you want to deactivate #{@object.full_name}?"} %>
index 41b480e697b74c008add6ea1020716db12f29c6f..7d9b5b6a37abb14185a693ff331859137d7f4082 100755 (executable)
@@ -215,6 +215,7 @@ if test -z "$packages" ; then
         arvados-server
         arvados-src
         arvados-sync-groups
+        arvados-sync-users
         arvados-workbench
         arvados-workbench2
         arvados-ws
index 3e1ed6a94de866c9feda7861b08318af8ff61b9d..d4240d4f26b9120c3477aff6460a66aa3c169955 100755 (executable)
@@ -268,6 +268,8 @@ package_go_binary cmd/arvados-server arvados-ws "$FORMAT" "$ARCH" \
     "Arvados Websocket server"
 package_go_binary tools/sync-groups arvados-sync-groups "$FORMAT" "$ARCH" \
     "Synchronize remote groups into Arvados from an external source"
+package_go_binary tools/sync-users arvados-sync-users "$FORMAT" "$ARCH" \
+    "Synchronize remote users into Arvados from an external source"
 package_go_binary tools/keep-block-check keep-block-check "$FORMAT" "$ARCH" \
     "Verify that all data from one set of Keep servers to another was copied"
 package_go_binary tools/keep-rsync keep-rsync "$FORMAT" "$ARCH" \
index 7c5e6d986e49fbc3aa8f42b8ffd6945c186fc94b..d2bb7e797582a8c2a98c850face5442b9e07bfdb 100644 (file)
@@ -177,7 +177,7 @@ navbar:
       - admin/federation.html.textile.liquid
       - admin/merge-remote-account.html.textile.liquid
       - admin/migrating-providers.html.textile.liquid
-      - user/topics/arvados-sync-groups.html.textile.liquid
+      - user/topics/arvados-sync-external-sources.html.textile.liquid
       - admin/scoped-tokens.html.textile.liquid
       - admin/token-expiration-policy.html.textile.liquid
       - admin/user-activity.html.textile.liquid
index bee91516bc12fc61e87a51b603361372ad64e358..1fa186e4e7c66bad3f8400a988bd982a69ff7c86 100644 (file)
@@ -70,6 +70,24 @@ h4. GetBucketVersioning
 
 Bucket versioning is presently not supported, so this will always respond that bucket versioning is not enabled.
 
+h3. Accessing collection/project properties as metadata
+
+GetObject, HeadObject, and HeadBucket return Arvados object properties as S3 metadata headers, e.g., @X-Amz-Meta-Foo: bar@.
+
+If the requested path indicates a file or directory placeholder inside a collection, or the top level of a collection, GetObject and HeadObject return the collection properties.
+
+If the requested path indicates a directory placeholder corresponding to a project, GetObject and HeadObject return the properties of the project.
+
+HeadBucket returns the properties of the collection or project corresponding to the bucket name.
+
+Non-string property values are returned in a JSON representation, e.g., @["foo","bar"]@.
+
+As in Amazon S3, property values containing non-ASCII characters are returned in BASE64-encoded form as described in RFC 2047, e.g., @=?UTF-8?b?4pu1?=@.
+
+GetBucketTagging and GetObjectTagging APIs are _not_ supported.
+
+It is not possible to modify collection or project properties using the S3 API.
+
 h3. Authorization mechanisms
 
 Keep-web accepts AWS Signature Version 4 (AWS4-HMAC-SHA256) as well as the older V2 AWS signature.
similarity index 51%
rename from doc/user/topics/arvados-sync-groups.html.textile.liquid
rename to doc/user/topics/arvados-sync-external-sources.html.textile.liquid
index 1f7eede4bb14650a862e1276cf1b8bccbc05e429..0ec0098f053aa0b4d53c5a133bc00c1ed2325f58 100644 (file)
@@ -1,7 +1,7 @@
 ---
 layout: default
 navsection: admin
-title: "Synchronizing external groups"
+title: "Synchronizing from external sources"
 ...
 {% comment %}
 Copyright (C) The Arvados Authors. All rights reserved.
@@ -9,7 +9,51 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
-The @arvados-sync-groups@ tool allows to synchronize groups in Arvados from an external source.
+The @arvados-sync-users@ and @arvados-sync-groups@ tools allow to manage Arvados users & groups from external sources.
+
+These tools are designed to be run periodically reading a file created by a remote auth system (ie: LDAP) dump script, applying what's included on the file as the source of truth.
+
+bq. NOTE: Both tools need to perform several administrative tasks on Arvados, so must be run using a superuser token via @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ environment variables or @~/.config/arvados/settings.conf@ file.
+
+h1. Using arvados-sync-users
+
+This tool reads a CSV (comma-separated values) file having information about user accounts and their expected state on Arvados.
+
+Every line on the file should have 5 fields:
+
+# A user identifier: it could be an email address (default) or a username.
+# The user's first name.
+# The user's last name.
+# The intended user's active state.
+# The intended user's admin state: will always be read as @false@ when @active=false@.
+
+The last 2 fields should be represented as @true@/@false@, @yes@/@no@, or @1@/@0@ values.
+
+h2. Options
+
+The following command line options are supported:
+
+table(table table-bordered table-condensed).
+|_. Option |_. Description |
+|==--help==|This list of options|
+|==--case-insensitive==|Uses case-insensitive username matching|
+|==--deactivate-unlisted==|Deactivate users that aren't listed on the input file. (Current & system users won't be affected)|
+|==--user-id==|Identifier to use in looking up user. One of 'email' or 'username' (Default: 'email')|
+|==--verbose==|Log informational messages|
+|==--version==|Print version and exit|
+
+The tool will create users when needed, and update those existing records to match the desired state described by the fields on the CSV file.
+System users like the root and anonymous are unaffected by this tool.
+In the case of a @LoginCluster@ federation, this tool should be run on the cluster that manages the user accounts, and will fail otherwise.
+
+h2. Example
+
+To sync users using the username to identify every account, reading from some @external_users.csv@ file and deactivating existing users that aren't included in it, the command should be called as follows:
+
+<notextile>
+<pre><code>~$ <span class="userinput">arvados-sync-users --deactivate-unlisted --user-id username /path/to/external_users.csv </span>
+</code></pre>
+</notextile>
 
 h1. Using arvados-sync-groups
 
@@ -21,11 +65,6 @@ Users can be identified by their email address or username: the tool will check
 
 Permission level can be one of the following: @can_read@, @can_write@ or @can_manage@, giving the group member read, read/write or managing privileges on the group. For backwards compatibility purposes, if any record omits the third (permission) field, it will default to @can_write@ permission. You can read more about permissions on the "group management admin guide":{{ site.baseurl }}/admin/group-management.html.
 
-This tool is designed to be run periodically reading a file created by a remote auth system (ie: LDAP) dump script, applying what's included on the file as the source of truth.
-
-
-bq. NOTE: @arvados-sync-groups@ needs to perform several administrative tasks on Arvados, so must be run using a superuser token
-
 h2. Options
 
 The following command line options are supported:
index 67d60197e7e160b5f7d99e510106a55a18ffd07f..b0ec4293a38acfdf6a349db48fff96c7bf3f7a1a 100644 (file)
@@ -1126,7 +1126,7 @@ func (s *IntegrationSuite) TestForwardRuntimeTokenToLoginCluster(c *check.C) {
 }
 
 func (s *IntegrationSuite) TestRunTrivialContainer(c *check.C) {
-       outcoll := s.runContainer(c, "z1111", map[string]interface{}{
+       outcoll, _ := s.runContainer(c, "z1111", "", map[string]interface{}{
                "command":             []string{"sh", "-c", "touch \"/out/hello world\" /out/ohai"},
                "container_image":     "busybox:uclibc",
                "cwd":                 "/tmp",
@@ -1141,10 +1141,49 @@ func (s *IntegrationSuite) TestRunTrivialContainer(c *check.C) {
        c.Check(outcoll.PortableDataHash, check.Equals, "8fa5dee9231a724d7cf377c5a2f4907c+65")
 }
 
-func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, ctrSpec map[string]interface{}, expectExitCode int) arvados.Collection {
+func (s *IntegrationSuite) TestContainerInputOnDifferentCluster(c *check.C) {
+       conn := s.super.Conn("z1111")
+       rootctx, _, _ := s.super.RootClients("z1111")
+       userctx, ac, _, _ := s.super.UserClients("z1111", rootctx, c, conn, s.oidcprovider.AuthEmail, true)
+       z1coll, err := conn.CollectionCreate(userctx, arvados.CreateOptions{Attrs: map[string]interface{}{
+               "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:ocelot\n",
+       }})
+       c.Assert(err, check.IsNil)
+
+       outcoll, logcfs := s.runContainer(c, "z2222", ac.AuthToken, map[string]interface{}{
+               "command":         []string{"ls", "/in"},
+               "container_image": "busybox:uclibc",
+               "cwd":             "/tmp",
+               "environment":     map[string]string{},
+               "mounts": map[string]arvados.Mount{
+                       "/in":  {Kind: "collection", PortableDataHash: z1coll.PortableDataHash},
+                       "/out": {Kind: "tmp", Capacity: 10000},
+               },
+               "output_path":         "/out",
+               "runtime_constraints": arvados.RuntimeConstraints{RAM: 100000000, VCPUs: 1},
+               "priority":            1,
+               "state":               arvados.ContainerRequestStateCommitted,
+               "container_count_max": 1,
+       }, -1)
+       if outcoll.UUID == "" {
+               arvmountlog, err := fs.ReadFile(arvados.FS(logcfs), "/arv-mount.txt")
+               c.Check(err, check.IsNil)
+               c.Check(string(arvmountlog), check.Matches, `(?ms).*cannot use a locally issued token to forward a request to our login cluster \(z1111\).*`)
+               c.Skip("this use case is not supported yet")
+       }
+       stdout, err := fs.ReadFile(arvados.FS(logcfs), "/stdout.txt")
+       c.Check(err, check.IsNil)
+       c.Check(string(stdout), check.Equals, "ocelot\n")
+}
+
+func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, token string, ctrSpec map[string]interface{}, expectExitCode int) (outcoll arvados.Collection, logcfs arvados.CollectionFileSystem) {
        conn := s.super.Conn(clusterID)
        rootctx, _, _ := s.super.RootClients(clusterID)
-       _, ac, kc, _ := s.super.UserClients(clusterID, rootctx, c, conn, s.oidcprovider.AuthEmail, true)
+       if token == "" {
+               _, ac, _, _ := s.super.UserClients(clusterID, rootctx, c, conn, s.oidcprovider.AuthEmail, true)
+               token = ac.AuthToken
+       }
+       _, ac, kc := s.super.ClientsWithToken(clusterID, token)
 
        c.Log("[docker load]")
        out, err := exec.Command("docker", "load", "--input", arvadostest.BusyboxDockerImage(c)).CombinedOutput()
@@ -1164,7 +1203,7 @@ func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, ctrSpec ma
        })
        c.Assert(err, check.IsNil)
 
-       showlogs := func(collectionID string) {
+       showlogs := func(collectionID string) arvados.CollectionFileSystem {
                var logcoll arvados.Collection
                err = ac.RequestAndDecode(&logcoll, "GET", "/arvados/v1/collections/"+collectionID, nil, nil)
                c.Assert(err, check.IsNil)
@@ -1182,42 +1221,36 @@ func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, ctrSpec ma
                        c.Logf("=== %s\n%s\n", path, buf)
                        return nil
                })
+               return cfs
        }
 
        var ctr arvados.Container
        var lastState arvados.ContainerState
        deadline := time.Now().Add(time.Minute)
-wait:
-       for ; ; lastState = ctr.State {
-               err = ac.RequestAndDecode(&ctr, "GET", "/arvados/v1/containers/"+cr.ContainerUUID, nil, nil)
+       for cr.State != arvados.ContainerRequestStateFinal {
+               err = ac.RequestAndDecode(&cr, "GET", "/arvados/v1/container_requests/"+cr.UUID, nil, nil)
                c.Assert(err, check.IsNil)
-               switch ctr.State {
-               case lastState:
+               err = ac.RequestAndDecode(&ctr, "GET", "/arvados/v1/containers/"+cr.ContainerUUID, nil, nil)
+               if err != nil {
+                       c.Logf("error getting container state: %s", err)
+               } else if ctr.State != lastState {
+                       c.Logf("container state changed to %q", ctr.State)
+                       lastState = ctr.State
+               } else {
                        if time.Now().After(deadline) {
-                               c.Errorf("timed out, container request state is %q", cr.State)
+                               c.Errorf("timed out, container state is %q", cr.State)
                                showlogs(ctr.Log)
                                c.FailNow()
                        }
                        time.Sleep(time.Second / 2)
-               case arvados.ContainerStateComplete:
-                       break wait
-               case arvados.ContainerStateQueued, arvados.ContainerStateLocked, arvados.ContainerStateRunning:
-                       c.Logf("container state changed to %q", ctr.State)
-               default:
-                       c.Errorf("unexpected container state %q", ctr.State)
-                       showlogs(ctr.Log)
-                       c.FailNow()
                }
        }
-       c.Check(ctr.ExitCode, check.Equals, 0)
-
-       err = ac.RequestAndDecode(&cr, "GET", "/arvados/v1/container_requests/"+cr.UUID, nil, nil)
-       c.Assert(err, check.IsNil)
-
-       showlogs(cr.LogUUID)
-
-       var outcoll arvados.Collection
-       err = ac.RequestAndDecode(&outcoll, "GET", "/arvados/v1/collections/"+cr.OutputUUID, nil, nil)
-       c.Assert(err, check.IsNil)
-       return outcoll
+       if expectExitCode >= 0 {
+               c.Check(ctr.State, check.Equals, arvados.ContainerStateComplete)
+               c.Check(ctr.ExitCode, check.Equals, expectExitCode)
+               err = ac.RequestAndDecode(&outcoll, "GET", "/arvados/v1/collections/"+cr.OutputUUID, nil, nil)
+               c.Assert(err, check.IsNil)
+       }
+       logcfs = showlogs(cr.LogUUID)
+       return outcoll, logcfs
 }
index ce9253ab3d4f5d5447273cfe02edca716afc52fd..2ad4d1f859f1141035c04cb4180c5ef623d1fa04 100644 (file)
@@ -234,6 +234,14 @@ type fileinfo struct {
        mode    os.FileMode
        size    int64
        modTime time.Time
+       // If not nil, sys() returns the source data structure, which
+       // can be a *Collection, *Group, or nil. Currently populated
+       // only for project dirs and top-level collection dirs. Does
+       // not stay up to date with upstream changes.
+       //
+       // Intended to support keep-web's properties-as-s3-metadata
+       // feature (https://dev.arvados.org/issues/19088).
+       sys func() interface{}
 }
 
 // Name implements os.FileInfo.
@@ -261,9 +269,12 @@ func (fi fileinfo) Size() int64 {
        return fi.size
 }
 
-// Sys implements os.FileInfo.
+// Sys implements os.FileInfo. See comment in fileinfo struct.
 func (fi fileinfo) Sys() interface{} {
-       return nil
+       if fi.sys == nil {
+               return nil
+       }
+       return fi.sys()
 }
 
 type nullnode struct{}
index ccfbdc4da262c13ee3d319ad072f73a10b9b1d0a..26012e240603d0be43a1019346c4e946e2821790 100644 (file)
@@ -85,6 +85,7 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile
                                name:    ".",
                                mode:    os.ModeDir | 0755,
                                modTime: modTime,
+                               sys:     func() interface{} { return c },
                        },
                        inodes: make(map[string]inode),
                },
index 66a126a39c12a45620a93c59a9047ac3d4ae1fe8..1dfa2df6e4005f0b6d93f497a657e81e583bad14 100644 (file)
@@ -24,6 +24,7 @@ func deferredCollectionFS(fs FileSystem, parent inode, coll Collection) inode {
                        name:    coll.Name,
                        modTime: modTime,
                        mode:    0755 | os.ModeDir,
+                       sys:     func() interface{} { return &coll },
                },
        }
        return &deferrednode{wrapped: placeholder, create: func() inode {
index 380fb9c6d5f2f8dac636b06c90df14973c0adb36..bea1f76e24f24faffa38fbccd7b6b880ffb2d22e 100644 (file)
@@ -38,6 +38,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in
                                {"uuid", "is_a", []string{"arvados#collection", "arvados#group"}},
                                {"groups.group_class", "=", "project"},
                        },
+                       Select: []string{"uuid", "name", "modified_at", "properties"},
                })
                if err != nil {
                        return nil, err
@@ -63,7 +64,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in
        if strings.Contains(coll.UUID, "-j7d0g-") {
                // Group item was loaded into a Collection var -- but
                // we only need the Name and UUID anyway, so it's OK.
-               return fs.newProjectNode(parent, coll.Name, coll.UUID), nil
+               return fs.newProjectNode(parent, coll.Name, coll.UUID, nil), nil
        } else if strings.Contains(coll.UUID, "-4zz18-") {
                return deferredCollectionFS(fs, parent, coll), nil
        } else {
@@ -98,6 +99,7 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode,
                        Count:   "none",
                        Filters: filters,
                        Order:   "uuid",
+                       Select:  []string{"uuid", "name", "modified_at", "properties"},
                }
 
                for {
@@ -121,7 +123,12 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode,
                                        continue
                                }
                                if strings.Contains(i.UUID, "-j7d0g-") {
-                                       inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID))
+                                       inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID, &Group{
+                                               UUID:       i.UUID,
+                                               Name:       i.Name,
+                                               ModifiedAt: i.ModifiedAt,
+                                               Properties: i.Properties,
+                                       }))
                                } else if strings.Contains(i.UUID, "-4zz18-") {
                                        inodes = append(inodes, deferredCollectionFS(fs, parent, i))
                                } else {
index 3892be1e9a97610522a1fc219d5c0fb807788e4c..bb2eee77925fd2c682c7d42e1e8e175c4f2f1489 100644 (file)
@@ -77,7 +77,7 @@ func (fs *customFileSystem) MountProject(mount, uuid string) {
        fs.root.treenode.Lock()
        defer fs.root.treenode.Unlock()
        fs.root.treenode.Child(mount, func(inode) (inode, error) {
-               return fs.newProjectNode(fs.root, mount, uuid), nil
+               return fs.newProjectNode(fs.root, mount, uuid, nil), nil
        })
 }
 
@@ -140,7 +140,7 @@ func (fs *customFileSystem) mountByID(parent inode, id string) inode {
        if strings.Contains(id, "-4zz18-") || pdhRegexp.MatchString(id) {
                return fs.mountCollection(parent, id)
        } else if strings.Contains(id, "-j7d0g-") {
-               return fs.newProjectNode(fs.root, id, id)
+               return fs.newProjectNode(fs.root, id, id, nil)
        } else {
                return nil
        }
@@ -161,7 +161,8 @@ func (fs *customFileSystem) mountCollection(parent inode, id string) inode {
        return cfs
 }
 
-func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode {
+func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, proj *Group) inode {
+       var projLoading sync.Mutex
        return &lookupnode{
                stale:   fs.Stale,
                loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) },
@@ -174,6 +175,20 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode
                                name:    name,
                                modTime: time.Now(),
                                mode:    0755 | os.ModeDir,
+                               sys: func() interface{} {
+                                       projLoading.Lock()
+                                       defer projLoading.Unlock()
+                                       if proj != nil {
+                                               return proj
+                                       }
+                                       var g Group
+                                       err := fs.RequestAndDecode(&g, "GET", "arvados/v1/groups/"+uuid, nil, nil)
+                                       if err != nil {
+                                               return err
+                                       }
+                                       proj = &g
+                                       return proj
+                               },
                        },
                },
        }
index 00f70369694430f70f0cca185270ceb905e34c01..ae47414b7abe80b9c0e2a2ff0a5e7c36d7320dd2 100644 (file)
@@ -20,7 +20,7 @@ func (fs *customFileSystem) usersLoadOne(parent inode, name string) (inode, erro
                return nil, os.ErrNotExist
        }
        user := resp.Items[0]
-       return fs.newProjectNode(parent, user.Username, user.UUID), nil
+       return fs.newProjectNode(parent, user.Username, user.UUID, nil), nil
 }
 
 func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) {
@@ -41,7 +41,7 @@ func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) {
                        if user.Username == "" {
                                continue
                        }
-                       inodes = append(inodes, fs.newProjectNode(parent, user.Username, user.UUID))
+                       inodes = append(inodes, fs.newProjectNode(parent, user.Username, user.UUID, nil))
                }
                params.Filters = []Filter{{"uuid", ">", resp.Items[len(resp.Items)-1].UUID}}
        }
index fa1e1ca43c64dc0b98a0587e703f0a075e890dae..69453959d262a792b7f09edca6b6557e8a5d8a4b 100644 (file)
@@ -25,7 +25,7 @@ class DatabaseController < ApplicationController
     unexpected_uuids = user_uuids - fixture_uuids
     if unexpected_uuids.any?
       logger.error("Running in test environment, but non-fixture users exist: " +
-                   "#{unexpected_uuids}")
+                   "#{unexpected_uuids}" + "\nMaybe test users without @example.com email addresses were created?")
       raise ArvadosModel::PermissionDeniedError
     end
 
index 14630d9efa85615a09585082299290b71def8530..1d9bcbb040ab7c5dd955361704da7b9a81a3147b 100644 (file)
@@ -12,6 +12,7 @@ system_user:
   modified_by_user_uuid: zzzzz-tpzed-000000000000000
   modified_at: 2014-11-27 06:38:21.208036000 Z
   email: root
+  username: root
   first_name: root
   last_name: ''
   identity_url:
@@ -171,7 +172,7 @@ spectator:
 container_runtime_token_user:
   owner_uuid: zzzzz-tpzed-000000000000000
   uuid: zzzzz-tpzed-l3skomkti0c4vg4
-  email: spectator@arvados.local
+  email: container_runtime_token_user@arvados.local
   first_name: Spect
   last_name: Ator
   identity_url: https://container_runtime_token_user.openid.local
@@ -193,6 +194,7 @@ inactive_uninvited:
   identity_url: https://inactive-uninvited-user.openid.local
   is_active: false
   is_admin: false
+  username: inactiveuninvited
   prefs: {}
 
 inactive:
@@ -216,6 +218,7 @@ inactive_but_signed_user_agreement:
   identity_url: https://inactive-but-agreeable-user.openid.local
   is_active: false
   is_admin: false
+  username: inactiveusersignedua
   prefs:
     profile:
       organization: example.com
@@ -230,6 +233,7 @@ anonymous:
   last_name: anonymouspublic
   is_active: false
   is_admin: false
+  username: anonymous
   prefs: {}
 
 job_reader:
@@ -273,17 +277,19 @@ active_no_prefs:
   identity_url: https://active_no_prefs.openid.local
   is_active: true
   is_admin: false
+  username: activenoprefs
   prefs: {}
 
 active_no_prefs_profile_no_getting_started_shown:
   owner_uuid: zzzzz-tpzed-000000000000000
   uuid: zzzzz-tpzed-a46c98d1td4aoj4
-  email: active_no_prefs_profile@arvados.local
+  email: active_no_prefs_profile_no_gs@arvados.local
   first_name: HasPrefs
   last_name: NoProfile
   identity_url: https://active_no_prefs_profile.openid.local
   is_active: true
   is_admin: false
+  username: activenoprefsprofilenogs
   prefs:
     test: abc
 
@@ -296,6 +302,7 @@ active_no_prefs_profile_with_getting_started_shown:
   identity_url: https://active_no_prefs_profile_seen_gs.openid.local
   is_active: true
   is_admin: false
+  username: activenoprefsprofile
   prefs:
     test: abc
     getting_started_shown: 2015-03-26 12:34:56.789000000 Z
@@ -308,6 +315,7 @@ active_with_prefs_profile_no_getting_started_shown:
   last_name: NoGettingStartedShown
   identity_url: https://active_nogettinstarted.openid.local
   is_active: true
+  username: activenogettinstarted
   prefs:
     profile:
       organization: example.com
@@ -372,7 +380,7 @@ fuse:
 permission_perftest:
   owner_uuid: zzzzz-tpzed-000000000000000
   uuid: zzzzz-tpzed-permissionptest
-  email: fuse@arvados.local
+  email: permission_perftest@arvados.local
   first_name: FUSE
   last_name: User
   identity_url: https://permission_perftest.openid.local
@@ -431,4 +439,4 @@ has_can_login_permission:
   is_active: true
   is_admin: false
   modified_at: 2015-03-26 12:34:56.789000000 Z
-  username: can-login-user
+  username: canLoginUser
index 59ab3cd4389c0cc57f9a923983ee58e12bca5184..90b75f8a306019c2b646d15228da0c1c54a62956 100644 (file)
@@ -8,12 +8,15 @@ import (
        "crypto/hmac"
        "crypto/sha256"
        "encoding/base64"
+       "encoding/json"
        "encoding/xml"
        "errors"
        "fmt"
        "hash"
        "io"
+       "mime"
        "net/http"
+       "net/textproto"
        "net/url"
        "os"
        "path/filepath"
@@ -385,6 +388,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                if r.Method == "HEAD" && !objectNameGiven {
                        // HeadBucket
                        if err == nil && fi.IsDir() {
+                               err = setFileInfoHeaders(w.Header(), fs, fspath)
+                               if err != nil {
+                                       s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+                                       return true
+                               }
                                w.WriteHeader(http.StatusOK)
                        } else if os.IsNotExist(err) {
                                s3ErrorResponse(w, NoSuchBucket, "The specified bucket does not exist.", r.URL.Path, http.StatusNotFound)
@@ -394,6 +402,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                        return true
                }
                if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Cluster.Collections.S3FolderObjects {
+                       err = setFileInfoHeaders(w.Header(), fs, fspath)
+                       if err != nil {
+                               s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+                               return true
+                       }
                        w.Header().Set("Content-Type", "application/x-directory")
                        w.WriteHeader(http.StatusOK)
                        return true
@@ -415,6 +428,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
                // shallow copy r, and change URL path
                r := *r
                r.URL.Path = fspath
+               err = setFileInfoHeaders(w.Header(), fs, fspath)
+               if err != nil {
+                       s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+                       return true
+               }
                http.FileServer(fs).ServeHTTP(w, &r)
                return true
        case r.Method == http.MethodPut:
@@ -586,6 +604,60 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
        }
 }
 
+func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) error {
+       maybeEncode := func(s string) string {
+               for _, c := range s {
+                       if c > '\u007f' {
+                               return mime.BEncoding.Encode("UTF-8", s)
+                       }
+               }
+               return s
+       }
+       path = strings.TrimSuffix(path, "/")
+       var props map[string]interface{}
+       for {
+               fi, err := fs.Stat(path)
+               if err != nil {
+                       return err
+               }
+               switch src := fi.Sys().(type) {
+               case *arvados.Collection:
+                       props = src.Properties
+               case *arvados.Group:
+                       props = src.Properties
+               default:
+                       if err, ok := src.(error); ok {
+                               return err
+                       }
+                       // Try parent
+                       cut := strings.LastIndexByte(path, '/')
+                       if cut < 0 {
+                               return nil
+                       }
+                       path = path[:cut]
+                       continue
+               }
+               break
+       }
+       for k, v := range props {
+               if !validMIMEHeaderKey(k) {
+                       continue
+               }
+               k = "x-amz-meta-" + k
+               if s, ok := v.(string); ok {
+                       header.Set(k, maybeEncode(s))
+               } else if j, err := json.Marshal(v); err == nil {
+                       header.Set(k, maybeEncode(string(j)))
+               }
+       }
+       return nil
+}
+
+func validMIMEHeaderKey(k string) bool {
+       check := "z-" + k
+       return check != textproto.CanonicalMIMEHeaderKey(check)
+}
+
 // Call fn on the given path (directory) and its contents, in
 // lexicographic order.
 //
index 261ebb5741388a87a618d7168936e4292052a6c3..a99f3c278f6214b5764f853920c10539f7757ffe 100644 (file)
@@ -11,6 +11,7 @@ import (
        "crypto/sha256"
        "fmt"
        "io/ioutil"
+       "mime"
        "net/http"
        "net/http/httptest"
        "net/url"
@@ -39,12 +40,13 @@ type s3stage struct {
        kc         *keepclient.KeepClient
        proj       arvados.Group
        projbucket *s3.Bucket
+       subproj    arvados.Group
        coll       arvados.Collection
        collbucket *s3.Bucket
 }
 
 func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
-       var proj arvados.Group
+       var proj, subproj arvados.Group
        var coll arvados.Collection
        arv := arvados.NewClientFromEnv()
        arv.AuthToken = arvadostest.ActiveToken
@@ -52,14 +54,35 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
                "group": map[string]interface{}{
                        "group_class": "project",
                        "name":        "keep-web s3 test",
+                       "properties": map[string]interface{}{
+                               "project-properties-key": "project properties value",
+                       },
                },
                "ensure_unique_name": true,
        })
        c.Assert(err, check.IsNil)
+       err = arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
+               "group": map[string]interface{}{
+                       "owner_uuid":  proj.UUID,
+                       "group_class": "project",
+                       "name":        "keep-web s3 test subproject",
+                       "properties": map[string]interface{}{
+                               "subproject_properties_key": "subproject properties value",
+                               "invalid header key":        "this value will not be returned because key contains spaces",
+                       },
+               },
+       })
+       c.Assert(err, check.IsNil)
        err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
                "owner_uuid":    proj.UUID,
                "name":          "keep-web s3 test collection",
                "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
+               "properties": map[string]interface{}{
+                       "string":   "string value",
+                       "array":    []string{"element1", "element2"},
+                       "object":   map[string]interface{}{"key": map[string]interface{}{"key2": "value⛵"}},
+                       "nonascii": "⛵",
+               },
        }})
        c.Assert(err, check.IsNil)
        ac, err := arvadosclient.New(arv)
@@ -95,7 +118,8 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
                        S3:   client,
                        Name: proj.UUID,
                },
-               coll: coll,
+               subproj: subproj,
+               coll:    coll,
                collbucket: &s3.Bucket{
                        S3:   client,
                        Name: coll.UUID,
@@ -215,6 +239,73 @@ func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix
        c.Check(exists, check.Equals, true)
 }
 
+func (s *IntegrationSuite) checkMetaEquals(c *check.C, hdr http.Header, expect map[string]string) {
+       got := map[string]string{}
+       for hk, hv := range hdr {
+               if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 {
+                       got[k] = hv[0]
+               }
+       }
+       c.Check(got, check.DeepEquals, expect)
+}
+
+func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) {
+       stage := s.s3setup(c)
+       defer stage.teardown(c)
+
+       expectCollectionTags := map[string]string{
+               "String":   "string value",
+               "Array":    `["element1","element2"]`,
+               "Object":   mime.BEncoding.Encode("UTF-8", `{"key":{"key2":"value⛵"}}`),
+               "Nonascii": "=?UTF-8?b?4pu1?=",
+       }
+       expectSubprojectTags := map[string]string{
+               "Subproject_properties_key": "subproject properties value",
+       }
+       expectProjectTags := map[string]string{
+               "Project-Properties-Key": "project properties value",
+       }
+
+       c.Log("HEAD object with metadata from collection")
+       resp, err := stage.collbucket.Head("sailboat.txt", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+       c.Log("GET object with metadata from collection")
+       rdr, hdr, err := stage.collbucket.GetReaderWithHeaders("sailboat.txt")
+       c.Assert(err, check.IsNil)
+       content, err := ioutil.ReadAll(rdr)
+       c.Check(err, check.IsNil)
+       rdr.Close()
+       c.Check(content, check.HasLen, 4)
+       s.checkMetaEquals(c, hdr, expectCollectionTags)
+
+       c.Log("HEAD bucket with metadata from collection")
+       resp, err = stage.collbucket.Head("/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+       c.Log("HEAD directory placeholder with metadata from collection")
+       resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+       c.Log("HEAD file with metadata from collection")
+       resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+       c.Log("HEAD directory placeholder with metadata from subproject")
+       resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectSubprojectTags)
+
+       c.Log("HEAD bucket with metadata from project")
+       resp, err = stage.projbucket.Head("/", nil)
+       c.Assert(err, check.IsNil)
+       s.checkMetaEquals(c, resp.Header, expectProjectTags)
+}
+
 func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {
        stage := s.s3setup(c)
        defer stage.teardown(c)
index da8a21efa37c8a8db91b925bc56040f7bff494b8..5c6691ab95279920498875a7e49295c8a2b4a5a4 100755 (executable)
@@ -10,6 +10,7 @@ require 'etc'
 require 'fileutils'
 require 'yaml'
 require 'optparse'
+require 'open3'
 
 req_envs = %w(ARVADOS_API_HOST ARVADOS_API_TOKEN ARVADOS_VIRTUAL_MACHINE_UUID)
 req_envs.each do |k|
@@ -124,11 +125,12 @@ begin
     unless pwnam[l[:username]]
       STDERR.puts "Creating account #{l[:username]}"
       # Create new user
-      unless system("useradd", "-m",
+      out, st = Open3.capture2e("useradd", "-m",
                 "-c", username,
                 "-s", "/bin/bash",
                 username)
-        STDERR.puts "Account creation failed for #{l[:username]}: #{$?}"
+      if st.exitstatus != 0
+        STDERR.puts "Account creation failed for #{l[:username]}:\n#{out}"
         next
       end
       begin
@@ -150,7 +152,10 @@ begin
       if existing_groups.index(addgroup).nil?
         # User should be in group, but isn't, so add them.
         STDERR.puts "Add user #{username} to #{addgroup} group"
-        system("usermod", "-aG", addgroup, username)
+        out, st = Open3.capture2e("usermod", "-aG", addgroup, username)
+        if st.exitstatus != 0
+          STDERR.puts "Failed to add #{username} to #{addgroup} group:\n#{out}"
+        end
       end
     end
 
@@ -158,7 +163,10 @@ begin
       if groups.index(removegroup).nil?
         # User is in a group, but shouldn't be, so remove them.
         STDERR.puts "Remove user #{username} from #{removegroup} group"
-        system("gpasswd", "-d", username, removegroup)
+        out, st = Open3.capture2e("gpasswd", "-d", username, removegroup)
+        if st.exitstatus != 0
+          STDERR.puts "Failed to remove user #{username} from #{removegroup} group:\n#{out}"
+        end
       end
     end
 
index f41b6ac5b36d45fc1e4ff0503c2072890c22a81a..02653082f30cafb75c03d28764d7b9115aab9214 100644 (file)
@@ -93,7 +93,7 @@ arvados:
     resources:
       virtual_machines:
         shell:
-          name: shell
+          name: shell.__CLUSTER__.__DOMAIN__
           backend: __SHELL_INT_IP__
           port: 4200
 
index e06ddd041c9acb4d01a1bab8a3deb8de6253f287..d6320da24651612e760178fa598bdd0fb6353b83 100644 (file)
@@ -19,7 +19,7 @@ postgres:
   users:
     __CLUSTER___arvados:
       ensure: present
-      password: __DATABASE_PASSWORD__
+      password: "__DATABASE_PASSWORD__"
 
   # tablespaces:
   #   arvados_tablespace:
index 86c591e97ed3679eb8687fddb19939839e63ad92..9028b9b1001f2f297d170f4d882574d42b875548 100644 (file)
@@ -75,6 +75,13 @@ extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_virtual_machine_uuid_cron_e
     - onlyif:
       - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
 
+extra_shell_cron_add_login_sync_add_{{ vm }}_sbin_to_path_cron_env_present:
+  cron.env_present:
+    - name: PATH
+    - value: "/bin:/usr/bin:/usr/sbin"
+    - onlyif:
+      - /bin/grep -qE "[a-z0-9]{5}-2x53u-[a-z0-9]{15}" /tmp/vm_uuid_{{ vm }}
+
 extra_shell_cron_add_login_sync_add_{{ vm }}_arvados_login_sync_cron_present:
   cron.present:
     - name: /usr/local/bin/arvados-login-sync
index dfddf3b62361ae66305f178bd3c0a84436a50082..cf087797159077c42334f4c93fe3df54a238906a 100644 (file)
@@ -55,7 +55,7 @@ nginx:
       - add_header: 'Strict-Transport-Security "max-age=63072000" always'
 
       # OCSP stapling
-      # FIXME! Stapling does not work with self-signed certificates, so disabling for tests
+      # NOTE! Stapling does not work with self-signed certificates, so disabling for tests
       # - ssl_stapling: 'on'
       # - ssl_stapling_verify: 'on'
 
index f3bc09f65036c7349e8f9f9fa1cd21746c25cdec..edb961ebaaeccca0899d0c2633ca7c0957369805 100644 (file)
@@ -38,7 +38,7 @@ postgres:
   users:
     __CLUSTER___arvados:
       ensure: present
-      password: __DATABASE_PASSWORD__
+      password: "__DATABASE_PASSWORD__"
 
   # tablespaces:
   #   arvados_tablespace:
index 379f4765cb0aa88689f31d99bf5c03ea84d5e560..c2d34ea28c1dd2c0551473eac8943973d5183804 100644 (file)
@@ -12,7 +12,7 @@ arvados_test_salt_states_examples_single_host_etc_hosts_host_present:
     - ip: 127.0.1.1
     - names:
       - {{ arvados.cluster.name }}.{{ arvados.cluster.domain }}
-      # FIXME! This just works for our testings.
+      # NOTE! This just works for our testings.
       # Won't work if the cluster name != host name
       {%- for entry in [
           'api',
index 21c1510de8aa36a153d76d2c7bd8ee8ae44d4cd2..26e2baf0446b861fccadfd466da74414c9c77856 100644 (file)
@@ -55,7 +55,7 @@ nginx:
       - add_header: 'Strict-Transport-Security "max-age=63072000" always'
 
       # OCSP stapling
-      # FIXME! Stapling does not work with self-signed certificates, so disabling for tests
+      # NOTE! Stapling does not work with self-signed certificates, so disabling for tests
       # - ssl_stapling: 'on'
       # - ssl_stapling_verify: 'on'
 
index a69b88cb173aa4f72e17343997c65d072456b9b9..14452a990541bf47fee379a33345895f6652cbd8 100644 (file)
@@ -40,7 +40,7 @@ postgres:
   users:
     __CLUSTER___arvados:
       ensure: present
-      password: __DATABASE_PASSWORD__
+      password: "__DATABASE_PASSWORD__"
 
   # tablespaces:
   #   arvados_tablespace:
index a688f4f8c11535fdcaaac7b33eaaccf5cddd16c9..51308fffa2c75445df9cd41287675a4a8d4aaedd 100644 (file)
@@ -21,7 +21,7 @@ arvados_test_salt_states_examples_single_host_etc_hosts_host_present:
     - ip: 127.0.1.1
     - names:
       - {{ arvados.cluster.name }}.{{ arvados.cluster.domain }}
-      # FIXME! This just works for our testing.
+      # NOTE! This just works for our testing.
       # Won't work if the cluster name != host name
       {%- for entry in [
           'api',
diff --git a/tools/salt-install/installer.sh b/tools/salt-install/installer.sh
new file mode 100755 (executable)
index 0000000..e5ff7be
--- /dev/null
@@ -0,0 +1,257 @@
+#!/bin/bash
+
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: CC-BY-SA-3.0
+
+#
+# installer.sh
+#
+# Helps manage the configuration in a git repository, and then deploy
+# nodes by pushing a copy of the git repository to each node and
+# running the provision script to do the actual installation and
+# configuration.
+#
+
+set -eu
+
+# The parameter file
+declare CONFIG_FILE=local.params
+
+# The salt template directory
+declare CONFIG_DIR=local_config_dir
+
+# The 5-character Arvados cluster id
+# This will be populated by loadconfig()
+declare CLUSTER
+
+# The parent domain (not including the cluster id)
+# This will be populated by loadconfig()
+declare DOMAIN
+
+# A bash associative array listing each node and mapping to the roles
+# that should be provisioned on those nodes.
+# This will be populated by loadconfig()
+declare -A NODES
+
+# The ssh user we'll use
+# This will be populated by loadconfig()
+declare DEPLOY_USER
+
+# The git repository that we'll push to on all the nodes
+# This will be populated by loadconfig()
+declare GITTARGET
+
+sync() {
+    local NODE=$1
+    local BRANCH=$2
+
+    # Synchronizes the configuration by creating a git repository on
+    # each node, pushing our branch, and updating the checkout.
+
+    if [[ "$NODE" != localhost ]] ; then
+       if ! ssh $NODE test -d ${GITTARGET}.git ; then
+
+           # Initialize the git repository (1st time case).  We're
+           # actually going to make two repositories here because git
+           # will complain if you try to push to a repository with a
+           # checkout. So we're going to create a "bare" repository
+           # and then clone a regular repository (with a checkout)
+           # from that.
+
+           ssh $NODE git init --bare ${GITTARGET}.git
+           if ! git remote add $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git ; then
+               git remote set-url $NODE $DEPLOY_USER@$NODE:${GITTARGET}.git
+           fi
+           git push $NODE $BRANCH
+           ssh $NODE git clone ${GITTARGET}.git ${GITTARGET}
+       fi
+
+       # The update case.
+       #
+       # Push to the bare repository on the remote node, then in the
+       # remote node repository with the checkout, pull the branch
+       # from the bare repository.
+
+       git push $NODE $BRANCH
+       ssh $NODE "git -C ${GITTARGET} checkout ${BRANCH} && git -C ${GITTARGET} pull"
+    fi
+}
+
+deploynode() {
+    local NODE=$1
+    local ROLES=$2
+
+    # Deploy a node.  This runs the provision script on the node, with
+    # the appropriate roles.
+
+    if [[ -z "$ROLES" ]] ; then
+       echo "No roles declared for '$NODE' in ${CONFIG_FILE}"
+       exit 1
+    fi
+
+    if [[ "$NODE" = localhost ]] ; then
+       sudo ./provision.sh --config ${CONFIG_FILE} --roles ${ROLES}
+    else
+       ssh $DEPLOY_USER@$NODE "cd ${GITTARGET} && sudo ./provision.sh --config ${CONFIG_FILE} --roles ${ROLES}"
+    fi
+}
+
+loadconfig() {
+    if [[ ! -s $CONFIG_FILE ]] ; then
+       echo "Must be run from initialized setup dir, maybe you need to 'initialize' first?"
+    fi
+    source ${CONFIG_FILE}
+    GITTARGET=arvados-deploy-config-${CLUSTER}
+}
+
+subcmd="$1"
+if [[ -n "$subcmd" ]] ; then
+    shift
+fi
+case "$subcmd" in
+    initialize)
+       if [[ ! -f provision.sh ]] ; then
+           echo "Must be run from arvados/tools/salt-install"
+           exit
+       fi
+
+       set +u
+       SETUPDIR=$1
+       PARAMS=$2
+       SLS=$3
+       set -u
+
+       err=
+       if [[ -z "$PARAMS" || ! -f local.params.example.$PARAMS ]] ; then
+           echo "Not found: local.params.example.$PARAMS"
+           echo "Expected one of multiple_hosts, single_host_multiple_hostnames, single_host_single_hostname"
+           err=1
+       fi
+
+       if [[ -z "$SLS" || ! -d config_examples/$SLS ]] ; then
+           echo "Not found: config_examples/$SLS"
+           echo "Expected one of multi_host/aws, single_host/multiple_hostnames, single_host/single_hostname"
+           err=1
+       fi
+
+       if [[ -z "$SETUPDIR" || -z "$PARAMS" || -z "$SLS" ]]; then
+           echo "installer.sh <setup dir to initialize> <params template> <config template>"
+           err=1
+       fi
+
+       if [[ -n "$err" ]] ; then
+           exit 1
+       fi
+
+       echo "Initializing $SETUPDIR"
+       git init $SETUPDIR
+       cp -r *.sh tests $SETUPDIR
+
+       cp local.params.example.$PARAMS $SETUPDIR/${CONFIG_FILE}
+       cp -r config_examples/$SLS $SETUPDIR/${CONFIG_DIR}
+
+       cd $SETUPDIR
+       git add *.sh ${CONFIG_FILE} ${CONFIG_DIR} tests
+       git commit -m"initial commit"
+
+       echo "setup directory initialized, now go to $SETUPDIR, edit '${CONFIG_FILE}' and '${CONFIG_DIR}' as needed, then run 'installer.sh deploy'"
+       ;;
+    deploy)
+       set +u
+       NODE=$1
+       set -u
+
+       loadconfig
+
+       if grep -rni 'fixme' ${CONFIG_FILE} ${CONFIG_DIR} ; then
+           echo
+           echo "Some parameters still need to be updated.  Please fix them and then re-run deploy."
+           exit 1
+       fi
+
+       BRANCH=$(git branch --show-current)
+
+       set -x
+
+       git add -A
+       if ! git diff --cached --exit-code ; then
+           git commit -m"prepare for deploy"
+       fi
+
+       if [[ -z "$NODE" ]]; then
+           for NODE in "${!NODES[@]}"
+           do
+               # First, push the git repo to each node.  This also
+               # confirms that we have git and can log into each
+               # node.
+               sync $NODE $BRANCH
+           done
+
+           for NODE in "${!NODES[@]}"
+           do
+               # Do 'database' role first,
+               if [[ "${NODES[$NODE]}" =~ database ]] ; then
+                   deploynode $NODE ${NODES[$NODE]}
+                   unset NODES[$NODE]
+               fi
+           done
+
+           for NODE in "${!NODES[@]}"
+           do
+               # then  'api' or 'controller' roles
+               if [[ "${NODES[$NODE]}" =~ (api|controller) ]] ; then
+                   deploynode $NODE ${NODES[$NODE]}
+                   unset NODES[$NODE]
+               fi
+           done
+
+           for NODE in "${!NODES[@]}"
+           do
+               # Everything else (we removed the nodes that we
+               # already deployed from the list)
+               deploynode $NODE ${NODES[$NODE]}
+           done
+       else
+           # Just deploy the node that was supplied on the command line.
+           sync $NODE $BRANCH
+           deploynode $NODE
+       fi
+
+       echo
+       echo "Completed deploy, run 'installer.sh diagnostics' to verify the install"
+
+       ;;
+    diagnostics)
+       loadconfig
+
+       set +u
+       declare LOCATION=$1
+       set -u
+
+       if ! which arvados-client ; then
+           echo "arvados-client not found, install 'arvados-client' package with 'apt-get' or 'yum'"
+           exit 1
+       fi
+
+       if [[ -z "$LOCATION" ]] ; then
+           echo "Need to provide '-internal-client' or '-external-client'"
+           echo
+           echo "-internal-client    You are running this on the same private network as the Arvados cluster (e.g. on one of the Arvados nodes)"
+           echo "-external-client    You are running this outside the private network of the Arvados cluster (e.g. your workstation)"
+           exit 1
+       fi
+
+       export ARVADOS_API_HOST="${CLUSTER}.${DOMAIN}"
+       export ARVADOS_API_TOKEN="$SYSTEM_ROOT_TOKEN"
+
+       arvados-client diagnostics $LOCATION
+       ;;
+    *)
+       echo "Arvados installer"
+       echo ""
+       echo "initialize   initialize the setup directory for configuration"
+       echo "deploy       deploy the configuration from the setup directory"
+       echo "diagnostics  check your install using diagnostics"
+       ;;
+esac
index 31a69e9840cdfabbee21609b662816b7df60c362..ade1ad46715fd0440b703aa63b5379ff4cf73ce1 100644 (file)
@@ -8,9 +8,26 @@
 # The Arvados cluster ID, needs to be 5 lowercase alphanumeric characters.
 CLUSTER="cluster_fixme_or_this_wont_work"
 
-# The domainname you want tou give to your cluster's hosts
+# The domain name you want to give to your cluster's hosts
+# the end result hostnames will be $SERVICE.$CLUSTER.$DOMAIN
 DOMAIN="domain_fixme_or_this_wont_work"
 
+# For multi-node installs, the ssh log in for each node
+# must be root or able to sudo
+DEPLOY_USER=root
+
+# The mapping of nodes to roles
+# installer.sh will log in to each of these nodes and then provision
+# it for the specified roles.
+NODES=(
+  [controller.${CLUSTER}.${DOMAIN}]=api,controller,websocket,dispatcher,keepbalance
+  [keep0.${CLUSTER}.${DOMAIN}]=keepstore
+  [keep1.${CLUSTER}.${DOMAIN}]=keepstore
+  [keep.${CLUSTER}.${DOMAIN}]=keepproxy,keepweb
+  [workbench.${CLUSTER}.${DOMAIN}]=workbench,workbench2,webshell
+  [shell.${CLUSTER}.${DOMAIN}]=shell
+)
+
 # Host SSL port where you want to point your browser to access Arvados
 # Defaults to 443 for regular runs, and to 8443 when called in Vagrant.
 # You can point it to another port if desired
index 2ce1556511bc7d57ddc5a58f53b5840de7353abf..20f334166e419ee806b608ac37fd3a27b10dca82 100644 (file)
@@ -11,6 +11,17 @@ CLUSTER="cluster_fixme_or_this_wont_work"
 # The domainname you want tou give to your cluster's hosts
 DOMAIN="domain_fixme_or_this_wont_work"
 
+# For multi-node installs, the ssh log in for each node
+# must be root or able to sudo
+DEPLOY_USER=root
+
+# The mapping of nodes to roles
+# installer.sh will log in to each of these nodes and then provision
+# it for the specified roles.
+NODES=(
+  [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell
+)
+
 # External ports used by the Arvados services
 CONTROLLER_EXT_SSL_PORT=443
 KEEP_EXT_SSL_PORT=25101
index 7add9868d9223f90c53d3ef209aa57d875a7328c..a68450094161accb43ef472def621e15b20b2d79 100644 (file)
@@ -11,6 +11,17 @@ CLUSTER="cluster_fixme_or_this_wont_work"
 # The domainname for your cluster's hosts
 DOMAIN="domain_fixme_or_this_wont_work"
 
+# For multi-node installs, the ssh log in for each node
+# must be root or able to sudo
+DEPLOY_USER=root
+
+# The mapping of nodes to roles
+# installer.sh will log in to each of these nodes and then provision
+# it for the specified roles.
+NODES=(
+  [localhost]=api,controller,websocket,dispatcher,keepbalance,keepstore,keepproxy,keepweb,workbench,workbench2,webshell
+)
+
 # Set this value when installing a cluster in a single host with a single
 # hostname to access all the instances. HOSTNAME_EXT should be set to the
 # external hostname for the instance.
index 3c5fb41e0ffc4cf02469e8ffb6d597aca419ea45..f4660be370990302cadbb9b3e11d8f2a44f5de10 100755 (executable)
@@ -237,6 +237,8 @@ T_DIR="/tmp/cluster_tests"
 
 arguments ${@}
 
+declare -A NODES
+
 if [ -s ${CONFIG_FILE} ]; then
   source ${CONFIG_FILE}
 else
@@ -255,7 +257,7 @@ if [ ! -d ${CONFIG_DIR} ]; then
   exit 1
 fi
 
-if grep -q 'fixme_or_this_wont_work' ${CONFIG_FILE} ; then
+if grep -rni 'fixme' ${CONFIG_FILE} ${CONFIG_DIR} ; then
   echo >&2 "The config file ${CONFIG_FILE} has some parameters that need to be modified."
   echo >&2 "Please, fix them and re-run the provision script."
   exit 1
diff --git a/tools/sync-users/.gitignore b/tools/sync-users/.gitignore
new file mode 100644 (file)
index 0000000..cbbc176
--- /dev/null
@@ -0,0 +1 @@
+sync-users
\ No newline at end of file
diff --git a/tools/sync-users/sync-users.go b/tools/sync-users/sync-users.go
new file mode 100644 (file)
index 0000000..37b94a9
--- /dev/null
@@ -0,0 +1,544 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "bytes"
+       "encoding/csv"
+       "encoding/json"
+       "flag"
+       "fmt"
+       "io"
+       "log"
+       "net/url"
+       "os"
+       "regexp"
+       "strconv"
+       "strings"
+
+       "git.arvados.org/arvados.git/lib/cmd"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+)
+
+var version = "dev"
+
+type resourceList interface {
+       Len() int
+       GetItems() []interface{}
+}
+
+// UserList implements resourceList interface
+type UserList struct {
+       arvados.UserList
+}
+
+// Len returns the amount of items this list holds
+func (l UserList) Len() int {
+       return len(l.Items)
+}
+
+// GetItems returns the list of items
+func (l UserList) GetItems() (out []interface{}) {
+       for _, item := range l.Items {
+               out = append(out, item)
+       }
+       return
+}
+
+func main() {
+       cfg, err := GetConfig()
+       if err != nil {
+               log.Fatalf("%v", err)
+       }
+
+       if err := doMain(&cfg); err != nil {
+               log.Fatalf("%v", err)
+       }
+}
+
+type ConfigParams struct {
+       CaseInsensitive    bool
+       Client             *arvados.Client
+       ClusterID          string
+       CurrentUser        arvados.User
+       DeactivateUnlisted bool
+       Path               string
+       UserID             string
+       SysUserUUID        string
+       AnonUserUUID       string
+       Verbose            bool
+}
+
+func ParseFlags(cfg *ConfigParams) error {
+       // Acceptable attributes to identify a user on the CSV file
+       userIDOpts := map[string]bool{
+               "email":    true, // default
+               "username": true,
+       }
+
+       flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
+       flags.Usage = func() {
+               usageStr := `Synchronize remote users into Arvados from a CSV format file with 5 columns:
+  * 1st: User Identifier (email or username)
+  * 2nd: First name
+  * 3rd: Last name
+  * 4th: Active status (0 or 1)
+  * 5th: Admin status (0 or 1)`
+               fmt.Fprintf(flags.Output(), "%s\n\n", usageStr)
+               fmt.Fprintf(flags.Output(), "Usage:\n%s [OPTIONS] <input-file.csv>\n\n", os.Args[0])
+               fmt.Fprintf(flags.Output(), "Options:\n")
+               flags.PrintDefaults()
+       }
+
+       caseInsensitive := flags.Bool(
+               "case-insensitive",
+               false,
+               "Performs case insensitive matching on user IDs. Always ON when using 'email' user IDs.")
+       deactivateUnlisted := flags.Bool(
+               "deactivate-unlisted",
+               false,
+               "Deactivate users that are not in the input file.")
+       userID := flags.String(
+               "user-id",
+               "email",
+               "Attribute by which every user is identified. Valid values are: email and username.")
+       verbose := flags.Bool(
+               "verbose",
+               false,
+               "Log informational messages.")
+       getVersion := flags.Bool(
+               "version",
+               false,
+               "Print version information and exit.")
+
+       if ok, code := cmd.ParseFlags(flags, os.Args[0], os.Args[1:], "input-file.csv", os.Stderr); !ok {
+               os.Exit(code)
+       } else if *getVersion {
+               fmt.Printf("%s %s\n", os.Args[0], version)
+               os.Exit(0)
+       }
+
+       // Input file as a required positional argument
+       if flags.NArg() == 0 {
+               return fmt.Errorf("please provide a path to an input file")
+       }
+       srcPath := &os.Args[flags.NFlag()+1]
+
+       // Validations
+       if *srcPath == "" {
+               return fmt.Errorf("input file path invalid")
+       }
+       if !userIDOpts[*userID] {
+               var options []string
+               for opt := range userIDOpts {
+                       options = append(options, opt)
+               }
+               return fmt.Errorf("user ID must be one of: %s", strings.Join(options, ", "))
+       }
+       if *userID == "email" {
+               // Always do case-insensitive email addresses matching
+               *caseInsensitive = true
+       }
+
+       cfg.CaseInsensitive = *caseInsensitive
+       cfg.DeactivateUnlisted = *deactivateUnlisted
+       cfg.Path = *srcPath
+       cfg.UserID = *userID
+       cfg.Verbose = *verbose
+
+       return nil
+}
+
+// GetConfig sets up a ConfigParams struct
+func GetConfig() (cfg ConfigParams, err error) {
+       err = ParseFlags(&cfg)
+       if err != nil {
+               return
+       }
+
+       cfg.Client = arvados.NewClientFromEnv()
+
+       // Check current user permissions
+       u, err := cfg.Client.CurrentUser()
+       if err != nil {
+               return cfg, fmt.Errorf("error getting the current user: %s", err)
+       }
+       if !u.IsAdmin {
+               return cfg, fmt.Errorf("current user %q is not an admin user", u.UUID)
+       }
+       if cfg.Verbose {
+               log.Printf("Running as admin user %q (%s)", u.Email, u.UUID)
+       }
+       cfg.CurrentUser = u
+
+       var ac struct {
+               ClusterID string
+               Login     struct {
+                       LoginCluster string
+               }
+       }
+       err = cfg.Client.RequestAndDecode(&ac, "GET", "arvados/v1/config", nil, nil)
+       if err != nil {
+               return cfg, fmt.Errorf("error getting the exported config: %s", err)
+       }
+       if ac.Login.LoginCluster != "" && ac.Login.LoginCluster != ac.ClusterID {
+               return cfg, fmt.Errorf("cannot run on a cluster other than the login cluster")
+       }
+       cfg.SysUserUUID = ac.ClusterID + "-tpzed-000000000000000"
+       cfg.AnonUserUUID = ac.ClusterID + "-tpzed-anonymouspublic"
+       cfg.ClusterID = ac.ClusterID
+
+       return cfg, nil
+}
+
+// GetUserID returns the correct user id value depending on the selector
+func GetUserID(u arvados.User, idSelector string) (string, error) {
+       switch idSelector {
+       case "email":
+               return u.Email, nil
+       case "username":
+               return u.Username, nil
+       default:
+               return "", fmt.Errorf("cannot identify user by %q selector", idSelector)
+       }
+}
+
+func doMain(cfg *ConfigParams) error {
+       // Try opening the input file early, just in case there's a problem.
+       f, err := os.Open(cfg.Path)
+       if err != nil {
+               return fmt.Errorf("error opening input file: %s", err)
+       }
+       defer f.Close()
+
+       iCaseLog := ""
+       if cfg.UserID == "username" && cfg.CaseInsensitive {
+               iCaseLog = " - username matching requested to be case-insensitive"
+       }
+       log.Printf("%s %s started. Using %q as users id%s", os.Args[0], version, cfg.UserID, iCaseLog)
+
+       allUsers := make(map[string]arvados.User)
+       userIDToUUID := make(map[string]string) // Index by email or username
+       dupedEmails := make(map[string][]arvados.User)
+       processedUsers := make(map[string]bool)
+       results, err := GetAll(cfg.Client, "users", arvados.ResourceListParams{}, &UserList{})
+       if err != nil {
+               return fmt.Errorf("error getting all users: %s", err)
+       }
+       log.Printf("Found %d users in cluster %q", len(results), cfg.ClusterID)
+       localUserUuidRegex := regexp.MustCompile(fmt.Sprintf("^%s-tpzed-[0-9a-z]{15}$", cfg.ClusterID))
+       for _, item := range results {
+               u := item.(arvados.User)
+
+               // Remote user check
+               if !localUserUuidRegex.MatchString(u.UUID) {
+                       if cfg.Verbose {
+                               log.Printf("Remote user %q (%s) won't be considered for processing", u.Email, u.UUID)
+                       }
+                       continue
+               }
+
+               // Duplicated user id check
+               uID, err := GetUserID(u, cfg.UserID)
+               if err != nil {
+                       return err
+               }
+               if uID == "" {
+                       return fmt.Errorf("%s is empty for user with uuid %q", cfg.UserID, u.UUID)
+               }
+               if cfg.CaseInsensitive {
+                       uID = strings.ToLower(uID)
+               }
+               if alreadySeenUUID, found := userIDToUUID[uID]; found {
+                       if cfg.UserID == "username" && uID != "" {
+                               return fmt.Errorf("case insensitive collision for username %q between %q and %q", uID, u.UUID, alreadySeenUUID)
+                       } else if cfg.UserID == "email" && uID != "" {
+                               log.Printf("Duplicated email %q found in user %s - ignoring", uID, u.UUID)
+                               if len(dupedEmails[uID]) == 0 {
+                                       dupedEmails[uID] = []arvados.User{allUsers[alreadySeenUUID]}
+                               }
+                               dupedEmails[uID] = append(dupedEmails[uID], u)
+                               delete(allUsers, alreadySeenUUID) // Skip even the first occurrence,
+                               // for security purposes.
+                               continue
+                       }
+               }
+               if cfg.Verbose {
+                       log.Printf("Seen user %q (%s)", uID, u.UUID)
+               }
+               userIDToUUID[uID] = u.UUID
+               allUsers[u.UUID] = u
+               processedUsers[u.UUID] = false
+       }
+
+       loadedRecords, err := LoadInputFile(f)
+       if err != nil {
+               return fmt.Errorf("reading input file %q: %s", cfg.Path, err)
+       }
+       log.Printf("Loaded %d records from input file", len(loadedRecords))
+
+       updatesSucceeded := map[string]bool{}
+       updatesFailed := map[string]bool{}
+       updatesSkipped := map[string]bool{}
+
+       for _, record := range loadedRecords {
+               if cfg.CaseInsensitive {
+                       record.UserID = strings.ToLower(record.UserID)
+               }
+               recordUUID := userIDToUUID[record.UserID]
+               processedUsers[recordUUID] = true
+               if cfg.UserID == "email" && record.UserID == cfg.CurrentUser.Email {
+                       updatesSkipped[recordUUID] = true
+                       log.Printf("Skipping current user %q (%s) from processing", record.UserID, cfg.CurrentUser.UUID)
+                       continue
+               }
+               if updated, err := ProcessRecord(cfg, record, userIDToUUID, allUsers); err != nil {
+                       log.Printf("error processing record %q: %s", record.UserID, err)
+                       updatesFailed[recordUUID] = true
+               } else if updated {
+                       updatesSucceeded[recordUUID] = true
+               }
+       }
+
+       if cfg.DeactivateUnlisted {
+               for userUUID, user := range allUsers {
+                       if shouldSkip(cfg, user) {
+                               updatesSkipped[userUUID] = true
+                               log.Printf("Skipping unlisted user %q (%s) from deactivating", user.Email, user.UUID)
+                               continue
+                       }
+                       if !processedUsers[userUUID] && allUsers[userUUID].IsActive {
+                               if cfg.Verbose {
+                                       log.Printf("Deactivating unlisted user %q (%s)", user.Username, user.UUID)
+                               }
+                               var updatedUser arvados.User
+                               if err := UnsetupUser(cfg.Client, user.UUID, &updatedUser); err != nil {
+                                       log.Printf("error deactivating unlisted user %q: %s", user.UUID, err)
+                                       updatesFailed[userUUID] = true
+                               } else {
+                                       allUsers[userUUID] = updatedUser
+                                       updatesSucceeded[userUUID] = true
+                               }
+                       }
+               }
+       }
+
+       log.Printf("User update successes: %d, skips: %d, failures: %d", len(updatesSucceeded), len(updatesSkipped), len(updatesFailed))
+
+       // Report duplicated emails detection
+       if len(dupedEmails) > 0 {
+               emails := make([]string, len(dupedEmails))
+               i := 0
+               for e := range dupedEmails {
+                       emails[i] = e
+                       i++
+               }
+               return fmt.Errorf("skipped %d duplicated email address(es) in the cluster's local user list: %v", len(dupedEmails), emails)
+       }
+
+       return nil
+}
+
+func shouldSkip(cfg *ConfigParams, user arvados.User) bool {
+       switch user.UUID {
+       case cfg.SysUserUUID, cfg.AnonUserUUID:
+               return true
+       case cfg.CurrentUser.UUID:
+               return true
+       }
+       return false
+}
+
+type userRecord struct {
+       UserID    string
+       FirstName string
+       LastName  string
+       Active    bool
+       Admin     bool
+}
+
+func needsUpdating(user arvados.User, record userRecord) bool {
+       userData := userRecord{"", user.FirstName, user.LastName, user.IsActive, user.IsAdmin}
+       recordData := userRecord{"", record.FirstName, record.LastName, record.Active, record.Admin}
+       return userData != recordData
+}
+
+// ProcessRecord creates or updates a user based on the given record
+func ProcessRecord(cfg *ConfigParams, record userRecord, userIDToUUID map[string]string, allUsers map[string]arvados.User) (bool, error) {
+       if cfg.Verbose {
+               log.Printf("Processing record for user %q", record.UserID)
+       }
+
+       wantedActiveStatus := strconv.FormatBool(record.Active)
+       wantedAdminStatus := strconv.FormatBool(record.Active && record.Admin)
+       createRequired := false
+       updateRequired := false
+       // Check if user exists, set its active & admin status.
+       var user arvados.User
+       recordUUID := userIDToUUID[record.UserID]
+       user, found := allUsers[recordUUID]
+       if !found {
+               if cfg.Verbose {
+                       log.Printf("User %q does not exist, creating", record.UserID)
+               }
+               createRequired = true
+               err := CreateUser(cfg.Client, &user, map[string]string{
+                       cfg.UserID:   record.UserID,
+                       "first_name": record.FirstName,
+                       "last_name":  record.LastName,
+                       "is_active":  wantedActiveStatus,
+                       "is_admin":   wantedAdminStatus,
+               })
+               if err != nil {
+                       return false, fmt.Errorf("error creating user %q: %s", record.UserID, err)
+               }
+       } else if needsUpdating(user, record) {
+               updateRequired = true
+               if record.Active {
+                       if !user.IsActive && cfg.Verbose {
+                               log.Printf("User %q (%s) is inactive, activating", record.UserID, user.UUID)
+                       }
+                       // Here we assume the 'setup' is done elsewhere if needed.
+                       err := UpdateUser(cfg.Client, user.UUID, &user, map[string]string{
+                               "first_name": record.FirstName,
+                               "last_name":  record.LastName,
+                               "is_active":  wantedActiveStatus,
+                               "is_admin":   wantedAdminStatus,
+                       })
+                       if err != nil {
+                               return false, fmt.Errorf("error updating user %q: %s", record.UserID, err)
+                       }
+               } else {
+                       fnChanged := user.FirstName != record.FirstName
+                       lnChanged := user.LastName != record.LastName
+                       if fnChanged || lnChanged {
+                               err := UpdateUser(cfg.Client, user.UUID, &user, map[string]string{
+                                       "first_name": record.FirstName,
+                                       "last_name":  record.LastName,
+                               })
+                               if err != nil {
+                                       return false, fmt.Errorf("error updating user %q: %s", record.UserID, err)
+                               }
+                       }
+                       if user.IsActive {
+                               if cfg.Verbose {
+                                       log.Printf("User %q is active, deactivating", record.UserID)
+                               }
+                               err := UnsetupUser(cfg.Client, user.UUID, &user)
+                               if err != nil {
+                                       return false, fmt.Errorf("error deactivating user %q: %s", record.UserID, err)
+                               }
+                       }
+               }
+       }
+       allUsers[record.UserID] = user
+       if createRequired {
+               log.Printf("Created user %q", record.UserID)
+       }
+       if updateRequired {
+               log.Printf("Updated user %q", record.UserID)
+       }
+
+       return createRequired || updateRequired, nil
+}
+
+// LoadInputFile reads the input file and returns a list of user records
+func LoadInputFile(f *os.File) (loadedRecords []userRecord, err error) {
+       lineNo := 0
+       csvReader := csv.NewReader(f)
+       loadedRecords = make([]userRecord, 0)
+
+       for {
+               record, e := csvReader.Read()
+               if e == io.EOF {
+                       break
+               }
+               lineNo++
+               if e != nil {
+                       err = fmt.Errorf("parsing error at line %d: %s", lineNo, e)
+                       return
+               }
+               if len(record) != 5 {
+                       err = fmt.Errorf("parsing error at line %d: expected 5 fields, found %d", lineNo, len(record))
+                       return
+               }
+               userID := strings.ToLower(strings.TrimSpace(record[0]))
+               firstName := strings.TrimSpace(record[1])
+               lastName := strings.TrimSpace(record[2])
+               active := strings.TrimSpace(record[3])
+               admin := strings.TrimSpace(record[4])
+               if userID == "" || firstName == "" || lastName == "" || active == "" || admin == "" {
+                       err = fmt.Errorf("parsing error at line %d: fields cannot be empty", lineNo)
+                       return
+               }
+               activeBool, err := strconv.ParseBool(active)
+               if err != nil {
+                       return nil, fmt.Errorf("parsing error at line %d: active status not recognized", lineNo)
+               }
+               adminBool, err := strconv.ParseBool(admin)
+               if err != nil {
+                       return nil, fmt.Errorf("parsing error at line %d: admin status not recognized", lineNo)
+               }
+               loadedRecords = append(loadedRecords, userRecord{
+                       UserID:    userID,
+                       FirstName: firstName,
+                       LastName:  lastName,
+                       Active:    activeBool,
+                       Admin:     adminBool,
+               })
+       }
+       return loadedRecords, nil
+}
+
+// GetAll adds all objects of type 'resource' to the 'allItems' list
+func GetAll(c *arvados.Client, res string, params arvados.ResourceListParams, page resourceList) (allItems []interface{}, err error) {
+       // Use the maximum page size the server allows
+       limit := 1<<31 - 1
+       params.Limit = &limit
+       params.Offset = 0
+       params.Order = "uuid"
+       for {
+               if err = GetResourceList(c, &page, res, params); err != nil {
+                       return allItems, err
+               }
+               // Have we finished paging?
+               if page.Len() == 0 {
+                       break
+               }
+               allItems = append(allItems, page.GetItems()...)
+               params.Offset += page.Len()
+       }
+       return allItems, nil
+}
+
+func jsonReader(rscName string, ob interface{}) io.Reader {
+       j, err := json.Marshal(ob)
+       if err != nil {
+               panic(err)
+       }
+       v := url.Values{}
+       v[rscName] = []string{string(j)}
+       return bytes.NewBufferString(v.Encode())
+}
+
+// GetResourceList fetches res list using params
+func GetResourceList(c *arvados.Client, dst *resourceList, res string, params interface{}) error {
+       return c.RequestAndDecode(dst, "GET", "/arvados/v1/"+res, nil, params)
+}
+
+// CreateUser creates a user with userData parameters, assigns it to dst
+func CreateUser(c *arvados.Client, dst *arvados.User, userData map[string]string) error {
+       return c.RequestAndDecode(dst, "POST", "/arvados/v1/users", jsonReader("user", userData), nil)
+}
+
+// UpdateUser updates a user with userData parameters
+func UpdateUser(c *arvados.Client, userUUID string, dst *arvados.User, userData map[string]string) error {
+       return c.RequestAndDecode(&dst, "PUT", "/arvados/v1/users/"+userUUID, jsonReader("user", userData), nil)
+}
+
+// UnsetupUser deactivates a user
+func UnsetupUser(c *arvados.Client, userUUID string, dst *arvados.User) error {
+       return c.RequestAndDecode(&dst, "POST", "/arvados/v1/users/"+userUUID+"/unsetup", nil, nil)
+}
diff --git a/tools/sync-users/sync-users_test.go b/tools/sync-users/sync-users_test.go
new file mode 100644 (file)
index 0000000..8b5385a
--- /dev/null
@@ -0,0 +1,436 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "fmt"
+       "io/ioutil"
+       "os"
+       "regexp"
+       "strings"
+       "testing"
+
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       . "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       TestingT(t)
+}
+
+type TestSuite struct {
+       cfg   *ConfigParams
+       ac    *arvados.Client
+       users map[string]arvados.User
+}
+
+func (s *TestSuite) SetUpTest(c *C) {
+       s.ac = arvados.NewClientFromEnv()
+       u, err := s.ac.CurrentUser()
+       c.Assert(err, IsNil)
+       c.Assert(u.IsAdmin, Equals, true)
+
+       s.users = make(map[string]arvados.User)
+       ul := arvados.UserList{}
+       s.ac.RequestAndDecode(&ul, "GET", "/arvados/v1/users", nil, arvados.ResourceListParams{})
+       c.Assert(ul.ItemsAvailable, Not(Equals), 0)
+       s.users = make(map[string]arvados.User)
+       for _, u := range ul.Items {
+               s.users[u.UUID] = u
+       }
+
+       // Set up command config
+       os.Args = []string{"cmd", "somefile.csv"}
+       config, err := GetConfig()
+       c.Assert(err, IsNil)
+       s.cfg = &config
+}
+
+func (s *TestSuite) TearDownTest(c *C) {
+       var dst interface{}
+       // Reset database to fixture state after every test run.
+       err := s.cfg.Client.RequestAndDecode(&dst, "POST", "/database/reset", nil, nil)
+       c.Assert(err, IsNil)
+}
+
+var _ = Suite(&TestSuite{})
+
+// MakeTempCSVFile creates a temp file with data as comma separated values
+func MakeTempCSVFile(data [][]string) (f *os.File, err error) {
+       f, err = ioutil.TempFile("", "test_sync_users")
+       if err != nil {
+               return
+       }
+       for _, line := range data {
+               fmt.Fprintf(f, "%s\n", strings.Join(line, ","))
+       }
+       err = f.Close()
+       return
+}
+
+// RecordsToStrings formats the input data suitable for MakeTempCSVFile
+func RecordsToStrings(records []userRecord) [][]string {
+       data := [][]string{}
+       for _, u := range records {
+               data = append(data, []string{
+                       u.UserID,
+                       u.FirstName,
+                       u.LastName,
+                       fmt.Sprintf("%t", u.Active),
+                       fmt.Sprintf("%t", u.Admin)})
+       }
+       return data
+}
+
+func ListUsers(ac *arvados.Client) ([]arvados.User, error) {
+       var ul arvados.UserList
+       err := ac.RequestAndDecode(&ul, "GET", "/arvados/v1/users", nil, arvados.ResourceListParams{})
+       if err != nil {
+               return nil, err
+       }
+       return ul.Items, nil
+}
+
+func (s *TestSuite) TestParseFlagsWithoutPositionalArgument(c *C) {
+       os.Args = []string{"cmd", "-verbose"}
+       err := ParseFlags(&ConfigParams{})
+       c.Assert(err, NotNil)
+       c.Assert(err, ErrorMatches, ".*please provide a path to an input file.*")
+}
+
+func (s *TestSuite) TestParseFlagsWrongUserID(c *C) {
+       os.Args = []string{"cmd", "-user-id=nickname", "/tmp/somefile.csv"}
+       err := ParseFlags(&ConfigParams{})
+       c.Assert(err, NotNil)
+       c.Assert(err, ErrorMatches, ".*user ID must be one of:.*")
+}
+
+func (s *TestSuite) TestParseFlagsWithPositionalArgument(c *C) {
+       cfg := ConfigParams{}
+       os.Args = []string{"cmd", "/tmp/somefile.csv"}
+       err := ParseFlags(&cfg)
+       c.Assert(err, IsNil)
+       c.Assert(cfg.Path, Equals, "/tmp/somefile.csv")
+       c.Assert(cfg.Verbose, Equals, false)
+       c.Assert(cfg.DeactivateUnlisted, Equals, false)
+       c.Assert(cfg.UserID, Equals, "email")
+       c.Assert(cfg.CaseInsensitive, Equals, true)
+}
+
+func (s *TestSuite) TestParseFlagsWithOptionalFlags(c *C) {
+       cfg := ConfigParams{}
+       os.Args = []string{"cmd", "-verbose", "-deactivate-unlisted", "-user-id=username", "/tmp/somefile.csv"}
+       err := ParseFlags(&cfg)
+       c.Assert(err, IsNil)
+       c.Assert(cfg.Path, Equals, "/tmp/somefile.csv")
+       c.Assert(cfg.Verbose, Equals, true)
+       c.Assert(cfg.DeactivateUnlisted, Equals, true)
+       c.Assert(cfg.UserID, Equals, "username")
+       c.Assert(cfg.CaseInsensitive, Equals, false)
+}
+
+func (s *TestSuite) TestGetConfig(c *C) {
+       os.Args = []string{"cmd", "/tmp/somefile.csv"}
+       cfg, err := GetConfig()
+       c.Assert(err, IsNil)
+       c.Assert(cfg.AnonUserUUID, Not(Equals), "")
+       c.Assert(cfg.SysUserUUID, Not(Equals), "")
+       c.Assert(cfg.CurrentUser, Not(Equals), "")
+       c.Assert(cfg.ClusterID, Not(Equals), "")
+       c.Assert(cfg.Client, NotNil)
+}
+
+func (s *TestSuite) TestFailOnEmptyFields(c *C) {
+       records := [][]string{
+               {"", "first-name", "last-name", "1", "0"},
+               {"user@example", "", "last-name", "1", "0"},
+               {"user@example", "first-name", "", "1", "0"},
+               {"user@example", "first-name", "last-name", "", "0"},
+               {"user@example", "first-name", "last-name", "1", ""},
+       }
+       for _, record := range records {
+               data := [][]string{record}
+               tmpfile, err := MakeTempCSVFile(data)
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, NotNil)
+               c.Assert(err, ErrorMatches, ".*fields cannot be empty.*")
+       }
+}
+
+func (s *TestSuite) TestIgnoreSpaces(c *C) {
+       // Make sure users aren't already there from fixtures
+       for _, user := range s.users {
+               e := user.Email
+               found := e == "user1@example.com" || e == "user2@example.com" || e == "user3@example.com"
+               c.Assert(found, Equals, false)
+       }
+       // Use CSV data with leading/trailing whitespaces, confirm that they get ignored
+       data := [][]string{
+               {" user1@example.com", "  Example", "   User1", "1", "0"},
+               {"user2@example.com ", "Example  ", "User2   ", "1", "0"},
+               {" user3@example.com ", "  Example  ", "   User3   ", "1", "0"},
+       }
+       tmpfile, err := MakeTempCSVFile(data)
+       c.Assert(err, IsNil)
+       defer os.Remove(tmpfile.Name())
+       s.cfg.Path = tmpfile.Name()
+       err = doMain(s.cfg)
+       c.Assert(err, IsNil)
+       users, err := ListUsers(s.cfg.Client)
+       c.Assert(err, IsNil)
+       for _, userNr := range []int{1, 2, 3} {
+               found := false
+               for _, user := range users {
+                       if user.Email == fmt.Sprintf("user%d@example.com", userNr) &&
+                               user.LastName == fmt.Sprintf("User%d", userNr) &&
+                               user.FirstName == "Example" && user.IsActive == true {
+                               found = true
+                               break
+                       }
+               }
+               c.Assert(found, Equals, true)
+       }
+}
+
+// Error out when records have != 5 records
+func (s *TestSuite) TestWrongNumberOfFields(c *C) {
+       for _, testCase := range [][][]string{
+               {{"user1@example.com", "Example", "User1", "1"}},
+               {{"user1@example.com", "Example", "User1", "1", "0", "extra data"}},
+       } {
+               tmpfile, err := MakeTempCSVFile(testCase)
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, NotNil)
+               c.Assert(err, ErrorMatches, ".*expected 5 fields, found.*")
+       }
+}
+
+// Error out when records have incorrect data types
+func (s *TestSuite) TestWrongDataFields(c *C) {
+       for _, testCase := range [][][]string{
+               {{"user1@example.com", "Example", "User1", "yep", "0"}},
+               {{"user1@example.com", "Example", "User1", "1", "nope"}},
+       } {
+               tmpfile, err := MakeTempCSVFile(testCase)
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, NotNil)
+               c.Assert(err, ErrorMatches, ".*parsing error at line.*[active|admin] status not recognized.*")
+       }
+}
+
+// Create, activate and deactivate users
+func (s *TestSuite) TestUserCreationAndUpdate(c *C) {
+       for _, tc := range []string{"email", "username"} {
+               uIDPrefix := tc
+               uIDSuffix := ""
+               if tc == "email" {
+                       uIDSuffix = "@example.com"
+               }
+               s.cfg.UserID = tc
+               records := []userRecord{{
+                       UserID:    fmt.Sprintf("%suser1%s", uIDPrefix, uIDSuffix),
+                       FirstName: "Example",
+                       LastName:  "User1",
+                       Active:    true,
+                       Admin:     false,
+               }, {
+                       UserID:    fmt.Sprintf("%suser2%s", uIDPrefix, uIDSuffix),
+                       FirstName: "Example",
+                       LastName:  "User2",
+                       Active:    false, // initially inactive
+                       Admin:     false,
+               }, {
+                       UserID:    fmt.Sprintf("%sadmin1%s", uIDPrefix, uIDSuffix),
+                       FirstName: "Example",
+                       LastName:  "Admin1",
+                       Active:    true,
+                       Admin:     true,
+               }, {
+                       UserID:    fmt.Sprintf("%sadmin2%s", uIDPrefix, uIDSuffix),
+                       FirstName: "Example",
+                       LastName:  "Admin2",
+                       Active:    false, // initially inactive
+                       Admin:     true,
+               }}
+               // Make sure users aren't already there from fixtures
+               for _, user := range s.users {
+                       uID, err := GetUserID(user, s.cfg.UserID)
+                       c.Assert(err, IsNil)
+                       found := false
+                       for _, r := range records {
+                               if uID == r.UserID {
+                                       found = true
+                                       break
+                               }
+                       }
+                       c.Assert(found, Equals, false)
+               }
+               // User creation
+               tmpfile, err := MakeTempCSVFile(RecordsToStrings(records))
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, IsNil)
+
+               users, err := ListUsers(s.cfg.Client)
+               c.Assert(err, IsNil)
+               for _, r := range records {
+                       var foundUser arvados.User
+                       for _, user := range users {
+                               uID, err := GetUserID(user, s.cfg.UserID)
+                               c.Assert(err, IsNil)
+                               if uID == r.UserID {
+                                       // Add an @example.com email if missing
+                                       // (to avoid database reset errors)
+                                       if tc == "username" && user.Email == "" {
+                                               err := UpdateUser(s.cfg.Client, user.UUID, &user, map[string]string{
+                                                       "email": fmt.Sprintf("%s@example.com", user.Username),
+                                               })
+                                               c.Assert(err, IsNil)
+                                       }
+                                       foundUser = user
+                                       break
+                               }
+                       }
+                       c.Assert(foundUser, NotNil)
+                       c.Logf("Checking creation for user %q", r.UserID)
+                       c.Assert(foundUser.FirstName, Equals, r.FirstName)
+                       c.Assert(foundUser.LastName, Equals, r.LastName)
+                       c.Assert(foundUser.IsActive, Equals, r.Active)
+                       c.Assert(foundUser.IsAdmin, Equals, (r.Active && r.Admin))
+               }
+               // User update
+               for idx := range records {
+                       records[idx].Active = !records[idx].Active
+                       records[idx].FirstName = records[idx].FirstName + "Updated"
+                       records[idx].LastName = records[idx].LastName + "Updated"
+               }
+               tmpfile, err = MakeTempCSVFile(RecordsToStrings(records))
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, IsNil)
+
+               users, err = ListUsers(s.cfg.Client)
+               c.Assert(err, IsNil)
+               for _, r := range records {
+                       var foundUser arvados.User
+                       for _, user := range users {
+                               uID, err := GetUserID(user, s.cfg.UserID)
+                               c.Assert(err, IsNil)
+                               if uID == r.UserID {
+                                       foundUser = user
+                                       break
+                               }
+                       }
+                       c.Assert(foundUser, NotNil)
+                       c.Logf("Checking update for user %q", r.UserID)
+                       c.Assert(foundUser.FirstName, Equals, r.FirstName)
+                       c.Assert(foundUser.LastName, Equals, r.LastName)
+                       c.Assert(foundUser.IsActive, Equals, r.Active)
+                       c.Assert(foundUser.IsAdmin, Equals, (r.Active && r.Admin))
+               }
+       }
+}
+
+func (s *TestSuite) TestDeactivateUnlisted(c *C) {
+       localUserUuidRegex := regexp.MustCompile(fmt.Sprintf("^%s-tpzed-[0-9a-z]{15}$", s.cfg.ClusterID))
+       users, err := ListUsers(s.cfg.Client)
+       c.Assert(err, IsNil)
+       previouslyActiveUsers := 0
+       for _, u := range users {
+               if u.UUID == fmt.Sprintf("%s-tpzed-anonymouspublic", s.cfg.ClusterID) && !u.IsActive {
+                       // Make sure the anonymous user is active for this test
+                       var au arvados.User
+                       err := UpdateUser(s.cfg.Client, u.UUID, &au, map[string]string{"is_active": "true"})
+                       c.Assert(err, IsNil)
+                       c.Assert(au.IsActive, Equals, true)
+               }
+               if localUserUuidRegex.MatchString(u.UUID) && u.IsActive {
+                       previouslyActiveUsers++
+               }
+       }
+       // At least 3 active users: System root, Anonymous and the current user.
+       // Other active users should exist from fixture.
+       c.Logf("Initial active users count: %d", previouslyActiveUsers)
+       c.Assert(previouslyActiveUsers > 3, Equals, true)
+
+       s.cfg.DeactivateUnlisted = true
+       s.cfg.Verbose = true
+       data := [][]string{
+               {"user1@example.com", "Example", "User1", "0", "0"},
+       }
+       tmpfile, err := MakeTempCSVFile(data)
+       c.Assert(err, IsNil)
+       defer os.Remove(tmpfile.Name())
+       s.cfg.Path = tmpfile.Name()
+       err = doMain(s.cfg)
+       c.Assert(err, IsNil)
+
+       users, err = ListUsers(s.cfg.Client)
+       c.Assert(err, IsNil)
+       currentlyActiveUsers := 0
+       acceptableActiveUUIDs := map[string]bool{
+               fmt.Sprintf("%s-tpzed-000000000000000", s.cfg.ClusterID): true,
+               fmt.Sprintf("%s-tpzed-anonymouspublic", s.cfg.ClusterID): true,
+               s.cfg.CurrentUser.UUID: true,
+       }
+       remainingActiveUUIDs := map[string]bool{}
+       seenUserEmails := map[string]bool{}
+       for _, u := range users {
+               if _, ok := seenUserEmails[u.Email]; ok {
+                       c.Errorf("Duplicated email address %q in user list (probably from fixtures). This test requires unique email addresses.", u.Email)
+               }
+               seenUserEmails[u.Email] = true
+               if localUserUuidRegex.MatchString(u.UUID) && u.IsActive {
+                       c.Logf("Found remaining active user %q (%s)", u.Email, u.UUID)
+                       _, ok := acceptableActiveUUIDs[u.UUID]
+                       c.Assert(ok, Equals, true)
+                       remainingActiveUUIDs[u.UUID] = true
+                       currentlyActiveUsers++
+               }
+       }
+       // 3 active users remaining: System root, Anonymous and the current user.
+       c.Logf("Active local users remaining: %v", remainingActiveUUIDs)
+       c.Assert(currentlyActiveUsers, Equals, 3)
+}
+
+func (s *TestSuite) TestFailOnDuplicatedEmails(c *C) {
+       for i := range []int{1, 2} {
+               isAdmin := i == 2
+               err := CreateUser(s.cfg.Client, &arvados.User{}, map[string]string{
+                       "email":      "somedupedemail@example.com",
+                       "first_name": fmt.Sprintf("Duped %d", i),
+                       "username":   fmt.Sprintf("dupedemail%d", i),
+                       "last_name":  "User",
+                       "is_active":  "true",
+                       "is_admin":   fmt.Sprintf("%t", isAdmin),
+               })
+               c.Assert(err, IsNil)
+       }
+       s.cfg.Verbose = true
+       data := [][]string{
+               {"user1@example.com", "Example", "User1", "0", "0"},
+       }
+       tmpfile, err := MakeTempCSVFile(data)
+       c.Assert(err, IsNil)
+       defer os.Remove(tmpfile.Name())
+       s.cfg.Path = tmpfile.Name()
+       err = doMain(s.cfg)
+       c.Assert(err, NotNil)
+       c.Assert(err, ErrorMatches, "skipped.*duplicated email address.*")
+}