Merge branch '16888-ctr-fed-token'
authorTom Clegg <tom@curii.com>
Wed, 6 Jul 2022 15:53:08 +0000 (11:53 -0400)
committerTom Clegg <tom@curii.com>
Wed, 6 Jul 2022 15:53:08 +0000 (11:53 -0400)
refs #16888

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

apps/workbench/app/views/users/_show_admin.html.erb
build/run-build-packages-one-target.sh
build/run-build-packages.sh
doc/_config.yml
doc/user/topics/arvados-sync-external-sources.html.textile.liquid [moved from doc/user/topics/arvados-sync-groups.html.textile.liquid with 51% similarity]
services/api/app/controllers/database_controller.rb
services/api/test/fixtures/users.yml
tools/sync-users/.gitignore [new file with mode: 0644]
tools/sync-users/sync-users.go [new file with mode: 0644]
tools/sync-users/sync-users_test.go [new file with mode: 0644]

index 1da22d438fabe1609cf09857d17ec0b6bd3c9a52..b151ceff042567e6020bdcd2a827203772be8b6d 100644 (file)
@@ -6,7 +6,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
   <div class="col-md-6">
 
     <p>
-      This page enables you to <a href="https://doc.arvados.org/master/admin/user-management.html">manage users</a>.
+      This page enables you to <a href="https://doc.arvados.org/main/admin/user-management.html">manage users</a>.
     </p>
 
     <p>
@@ -22,7 +22,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
       As an admin, you can deactivate and reset this user. This will
       remove all repository/VM permissions for the user. If you
       "setup" the user again, the user will have to sign the user
-      agreement again.  You may also want to <a href="https://doc.arvados.org/master/admin/reassign-ownership.html">reassign data ownership</a>.
+      agreement again.  You may also want to <a href="https://doc.arvados.org/main/admin/reassign-ownership.html">reassign data ownership</a>.
     </p>
 
     <%= button_to "Deactivate #{@object.full_name}", unsetup_user_url(id: @object.uuid), class: 'btn btn-primary', data: {confirm: "Are you sure you want to deactivate #{@object.full_name}?"} %>
index 41b480e697b74c008add6ea1020716db12f29c6f..7d9b5b6a37abb14185a693ff331859137d7f4082 100755 (executable)
@@ -215,6 +215,7 @@ if test -z "$packages" ; then
         arvados-server
         arvados-src
         arvados-sync-groups
+        arvados-sync-users
         arvados-workbench
         arvados-workbench2
         arvados-ws
index 3e1ed6a94de866c9feda7861b08318af8ff61b9d..d4240d4f26b9120c3477aff6460a66aa3c169955 100755 (executable)
@@ -268,6 +268,8 @@ package_go_binary cmd/arvados-server arvados-ws "$FORMAT" "$ARCH" \
     "Arvados Websocket server"
 package_go_binary tools/sync-groups arvados-sync-groups "$FORMAT" "$ARCH" \
     "Synchronize remote groups into Arvados from an external source"
+package_go_binary tools/sync-users arvados-sync-users "$FORMAT" "$ARCH" \
+    "Synchronize remote users into Arvados from an external source"
 package_go_binary tools/keep-block-check keep-block-check "$FORMAT" "$ARCH" \
     "Verify that all data from one set of Keep servers to another was copied"
 package_go_binary tools/keep-rsync keep-rsync "$FORMAT" "$ARCH" \
index 7c5e6d986e49fbc3aa8f42b8ffd6945c186fc94b..d2bb7e797582a8c2a98c850face5442b9e07bfdb 100644 (file)
@@ -177,7 +177,7 @@ navbar:
       - admin/federation.html.textile.liquid
       - admin/merge-remote-account.html.textile.liquid
       - admin/migrating-providers.html.textile.liquid
-      - user/topics/arvados-sync-groups.html.textile.liquid
+      - user/topics/arvados-sync-external-sources.html.textile.liquid
       - admin/scoped-tokens.html.textile.liquid
       - admin/token-expiration-policy.html.textile.liquid
       - admin/user-activity.html.textile.liquid
similarity index 51%
rename from doc/user/topics/arvados-sync-groups.html.textile.liquid
rename to doc/user/topics/arvados-sync-external-sources.html.textile.liquid
index 1f7eede4bb14650a862e1276cf1b8bccbc05e429..0ec0098f053aa0b4d53c5a133bc00c1ed2325f58 100644 (file)
@@ -1,7 +1,7 @@
 ---
 layout: default
 navsection: admin
-title: "Synchronizing external groups"
+title: "Synchronizing from external sources"
 ...
 {% comment %}
 Copyright (C) The Arvados Authors. All rights reserved.
@@ -9,7 +9,51 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
-The @arvados-sync-groups@ tool allows to synchronize groups in Arvados from an external source.
+The @arvados-sync-users@ and @arvados-sync-groups@ tools allow to manage Arvados users & groups from external sources.
+
+These tools are designed to be run periodically reading a file created by a remote auth system (ie: LDAP) dump script, applying what's included on the file as the source of truth.
+
+bq. NOTE: Both tools need to perform several administrative tasks on Arvados, so must be run using a superuser token via @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ environment variables or @~/.config/arvados/settings.conf@ file.
+
+h1. Using arvados-sync-users
+
+This tool reads a CSV (comma-separated values) file having information about user accounts and their expected state on Arvados.
+
+Every line on the file should have 5 fields:
+
+# A user identifier: it could be an email address (default) or a username.
+# The user's first name.
+# The user's last name.
+# The intended user's active state.
+# The intended user's admin state: will always be read as @false@ when @active=false@.
+
+The last 2 fields should be represented as @true@/@false@, @yes@/@no@, or @1@/@0@ values.
+
+h2. Options
+
+The following command line options are supported:
+
+table(table table-bordered table-condensed).
+|_. Option |_. Description |
+|==--help==|This list of options|
+|==--case-insensitive==|Uses case-insensitive username matching|
+|==--deactivate-unlisted==|Deactivate users that aren't listed on the input file. (Current & system users won't be affected)|
+|==--user-id==|Identifier to use in looking up user. One of 'email' or 'username' (Default: 'email')|
+|==--verbose==|Log informational messages|
+|==--version==|Print version and exit|
+
+The tool will create users when needed, and update those existing records to match the desired state described by the fields on the CSV file.
+System users like the root and anonymous are unaffected by this tool.
+In the case of a @LoginCluster@ federation, this tool should be run on the cluster that manages the user accounts, and will fail otherwise.
+
+h2. Example
+
+To sync users using the username to identify every account, reading from some @external_users.csv@ file and deactivating existing users that aren't included in it, the command should be called as follows:
+
+<notextile>
+<pre><code>~$ <span class="userinput">arvados-sync-users --deactivate-unlisted --user-id username /path/to/external_users.csv </span>
+</code></pre>
+</notextile>
 
 h1. Using arvados-sync-groups
 
@@ -21,11 +65,6 @@ Users can be identified by their email address or username: the tool will check
 
 Permission level can be one of the following: @can_read@, @can_write@ or @can_manage@, giving the group member read, read/write or managing privileges on the group. For backwards compatibility purposes, if any record omits the third (permission) field, it will default to @can_write@ permission. You can read more about permissions on the "group management admin guide":{{ site.baseurl }}/admin/group-management.html.
 
-This tool is designed to be run periodically reading a file created by a remote auth system (ie: LDAP) dump script, applying what's included on the file as the source of truth.
-
-
-bq. NOTE: @arvados-sync-groups@ needs to perform several administrative tasks on Arvados, so must be run using a superuser token
-
 h2. Options
 
 The following command line options are supported:
index fa1e1ca43c64dc0b98a0587e703f0a075e890dae..69453959d262a792b7f09edca6b6557e8a5d8a4b 100644 (file)
@@ -25,7 +25,7 @@ class DatabaseController < ApplicationController
     unexpected_uuids = user_uuids - fixture_uuids
     if unexpected_uuids.any?
       logger.error("Running in test environment, but non-fixture users exist: " +
-                   "#{unexpected_uuids}")
+                   "#{unexpected_uuids}" + "\nMaybe test users without @example.com email addresses were created?")
       raise ArvadosModel::PermissionDeniedError
     end
 
index 14630d9efa85615a09585082299290b71def8530..1d9bcbb040ab7c5dd955361704da7b9a81a3147b 100644 (file)
@@ -12,6 +12,7 @@ system_user:
   modified_by_user_uuid: zzzzz-tpzed-000000000000000
   modified_at: 2014-11-27 06:38:21.208036000 Z
   email: root
+  username: root
   first_name: root
   last_name: ''
   identity_url:
@@ -171,7 +172,7 @@ spectator:
 container_runtime_token_user:
   owner_uuid: zzzzz-tpzed-000000000000000
   uuid: zzzzz-tpzed-l3skomkti0c4vg4
-  email: spectator@arvados.local
+  email: container_runtime_token_user@arvados.local
   first_name: Spect
   last_name: Ator
   identity_url: https://container_runtime_token_user.openid.local
@@ -193,6 +194,7 @@ inactive_uninvited:
   identity_url: https://inactive-uninvited-user.openid.local
   is_active: false
   is_admin: false
+  username: inactiveuninvited
   prefs: {}
 
 inactive:
@@ -216,6 +218,7 @@ inactive_but_signed_user_agreement:
   identity_url: https://inactive-but-agreeable-user.openid.local
   is_active: false
   is_admin: false
+  username: inactiveusersignedua
   prefs:
     profile:
       organization: example.com
@@ -230,6 +233,7 @@ anonymous:
   last_name: anonymouspublic
   is_active: false
   is_admin: false
+  username: anonymous
   prefs: {}
 
 job_reader:
@@ -273,17 +277,19 @@ active_no_prefs:
   identity_url: https://active_no_prefs.openid.local
   is_active: true
   is_admin: false
+  username: activenoprefs
   prefs: {}
 
 active_no_prefs_profile_no_getting_started_shown:
   owner_uuid: zzzzz-tpzed-000000000000000
   uuid: zzzzz-tpzed-a46c98d1td4aoj4
-  email: active_no_prefs_profile@arvados.local
+  email: active_no_prefs_profile_no_gs@arvados.local
   first_name: HasPrefs
   last_name: NoProfile
   identity_url: https://active_no_prefs_profile.openid.local
   is_active: true
   is_admin: false
+  username: activenoprefsprofilenogs
   prefs:
     test: abc
 
@@ -296,6 +302,7 @@ active_no_prefs_profile_with_getting_started_shown:
   identity_url: https://active_no_prefs_profile_seen_gs.openid.local
   is_active: true
   is_admin: false
+  username: activenoprefsprofile
   prefs:
     test: abc
     getting_started_shown: 2015-03-26 12:34:56.789000000 Z
@@ -308,6 +315,7 @@ active_with_prefs_profile_no_getting_started_shown:
   last_name: NoGettingStartedShown
   identity_url: https://active_nogettinstarted.openid.local
   is_active: true
+  username: activenogettinstarted
   prefs:
     profile:
       organization: example.com
@@ -372,7 +380,7 @@ fuse:
 permission_perftest:
   owner_uuid: zzzzz-tpzed-000000000000000
   uuid: zzzzz-tpzed-permissionptest
-  email: fuse@arvados.local
+  email: permission_perftest@arvados.local
   first_name: FUSE
   last_name: User
   identity_url: https://permission_perftest.openid.local
@@ -431,4 +439,4 @@ has_can_login_permission:
   is_active: true
   is_admin: false
   modified_at: 2015-03-26 12:34:56.789000000 Z
-  username: can-login-user
+  username: canLoginUser
diff --git a/tools/sync-users/.gitignore b/tools/sync-users/.gitignore
new file mode 100644 (file)
index 0000000..cbbc176
--- /dev/null
@@ -0,0 +1 @@
+sync-users
\ No newline at end of file
diff --git a/tools/sync-users/sync-users.go b/tools/sync-users/sync-users.go
new file mode 100644 (file)
index 0000000..37b94a9
--- /dev/null
@@ -0,0 +1,544 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "bytes"
+       "encoding/csv"
+       "encoding/json"
+       "flag"
+       "fmt"
+       "io"
+       "log"
+       "net/url"
+       "os"
+       "regexp"
+       "strconv"
+       "strings"
+
+       "git.arvados.org/arvados.git/lib/cmd"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+)
+
+var version = "dev"
+
+type resourceList interface {
+       Len() int
+       GetItems() []interface{}
+}
+
+// UserList implements resourceList interface
+type UserList struct {
+       arvados.UserList
+}
+
+// Len returns the amount of items this list holds
+func (l UserList) Len() int {
+       return len(l.Items)
+}
+
+// GetItems returns the list of items
+func (l UserList) GetItems() (out []interface{}) {
+       for _, item := range l.Items {
+               out = append(out, item)
+       }
+       return
+}
+
+func main() {
+       cfg, err := GetConfig()
+       if err != nil {
+               log.Fatalf("%v", err)
+       }
+
+       if err := doMain(&cfg); err != nil {
+               log.Fatalf("%v", err)
+       }
+}
+
+type ConfigParams struct {
+       CaseInsensitive    bool
+       Client             *arvados.Client
+       ClusterID          string
+       CurrentUser        arvados.User
+       DeactivateUnlisted bool
+       Path               string
+       UserID             string
+       SysUserUUID        string
+       AnonUserUUID       string
+       Verbose            bool
+}
+
+func ParseFlags(cfg *ConfigParams) error {
+       // Acceptable attributes to identify a user on the CSV file
+       userIDOpts := map[string]bool{
+               "email":    true, // default
+               "username": true,
+       }
+
+       flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
+       flags.Usage = func() {
+               usageStr := `Synchronize remote users into Arvados from a CSV format file with 5 columns:
+  * 1st: User Identifier (email or username)
+  * 2nd: First name
+  * 3rd: Last name
+  * 4th: Active status (0 or 1)
+  * 5th: Admin status (0 or 1)`
+               fmt.Fprintf(flags.Output(), "%s\n\n", usageStr)
+               fmt.Fprintf(flags.Output(), "Usage:\n%s [OPTIONS] <input-file.csv>\n\n", os.Args[0])
+               fmt.Fprintf(flags.Output(), "Options:\n")
+               flags.PrintDefaults()
+       }
+
+       caseInsensitive := flags.Bool(
+               "case-insensitive",
+               false,
+               "Performs case insensitive matching on user IDs. Always ON when using 'email' user IDs.")
+       deactivateUnlisted := flags.Bool(
+               "deactivate-unlisted",
+               false,
+               "Deactivate users that are not in the input file.")
+       userID := flags.String(
+               "user-id",
+               "email",
+               "Attribute by which every user is identified. Valid values are: email and username.")
+       verbose := flags.Bool(
+               "verbose",
+               false,
+               "Log informational messages.")
+       getVersion := flags.Bool(
+               "version",
+               false,
+               "Print version information and exit.")
+
+       if ok, code := cmd.ParseFlags(flags, os.Args[0], os.Args[1:], "input-file.csv", os.Stderr); !ok {
+               os.Exit(code)
+       } else if *getVersion {
+               fmt.Printf("%s %s\n", os.Args[0], version)
+               os.Exit(0)
+       }
+
+       // Input file as a required positional argument
+       if flags.NArg() == 0 {
+               return fmt.Errorf("please provide a path to an input file")
+       }
+       srcPath := &os.Args[flags.NFlag()+1]
+
+       // Validations
+       if *srcPath == "" {
+               return fmt.Errorf("input file path invalid")
+       }
+       if !userIDOpts[*userID] {
+               var options []string
+               for opt := range userIDOpts {
+                       options = append(options, opt)
+               }
+               return fmt.Errorf("user ID must be one of: %s", strings.Join(options, ", "))
+       }
+       if *userID == "email" {
+               // Always do case-insensitive email addresses matching
+               *caseInsensitive = true
+       }
+
+       cfg.CaseInsensitive = *caseInsensitive
+       cfg.DeactivateUnlisted = *deactivateUnlisted
+       cfg.Path = *srcPath
+       cfg.UserID = *userID
+       cfg.Verbose = *verbose
+
+       return nil
+}
+
+// GetConfig sets up a ConfigParams struct
+func GetConfig() (cfg ConfigParams, err error) {
+       err = ParseFlags(&cfg)
+       if err != nil {
+               return
+       }
+
+       cfg.Client = arvados.NewClientFromEnv()
+
+       // Check current user permissions
+       u, err := cfg.Client.CurrentUser()
+       if err != nil {
+               return cfg, fmt.Errorf("error getting the current user: %s", err)
+       }
+       if !u.IsAdmin {
+               return cfg, fmt.Errorf("current user %q is not an admin user", u.UUID)
+       }
+       if cfg.Verbose {
+               log.Printf("Running as admin user %q (%s)", u.Email, u.UUID)
+       }
+       cfg.CurrentUser = u
+
+       var ac struct {
+               ClusterID string
+               Login     struct {
+                       LoginCluster string
+               }
+       }
+       err = cfg.Client.RequestAndDecode(&ac, "GET", "arvados/v1/config", nil, nil)
+       if err != nil {
+               return cfg, fmt.Errorf("error getting the exported config: %s", err)
+       }
+       if ac.Login.LoginCluster != "" && ac.Login.LoginCluster != ac.ClusterID {
+               return cfg, fmt.Errorf("cannot run on a cluster other than the login cluster")
+       }
+       cfg.SysUserUUID = ac.ClusterID + "-tpzed-000000000000000"
+       cfg.AnonUserUUID = ac.ClusterID + "-tpzed-anonymouspublic"
+       cfg.ClusterID = ac.ClusterID
+
+       return cfg, nil
+}
+
+// GetUserID returns the correct user id value depending on the selector
+func GetUserID(u arvados.User, idSelector string) (string, error) {
+       switch idSelector {
+       case "email":
+               return u.Email, nil
+       case "username":
+               return u.Username, nil
+       default:
+               return "", fmt.Errorf("cannot identify user by %q selector", idSelector)
+       }
+}
+
+func doMain(cfg *ConfigParams) error {
+       // Try opening the input file early, just in case there's a problem.
+       f, err := os.Open(cfg.Path)
+       if err != nil {
+               return fmt.Errorf("error opening input file: %s", err)
+       }
+       defer f.Close()
+
+       iCaseLog := ""
+       if cfg.UserID == "username" && cfg.CaseInsensitive {
+               iCaseLog = " - username matching requested to be case-insensitive"
+       }
+       log.Printf("%s %s started. Using %q as users id%s", os.Args[0], version, cfg.UserID, iCaseLog)
+
+       allUsers := make(map[string]arvados.User)
+       userIDToUUID := make(map[string]string) // Index by email or username
+       dupedEmails := make(map[string][]arvados.User)
+       processedUsers := make(map[string]bool)
+       results, err := GetAll(cfg.Client, "users", arvados.ResourceListParams{}, &UserList{})
+       if err != nil {
+               return fmt.Errorf("error getting all users: %s", err)
+       }
+       log.Printf("Found %d users in cluster %q", len(results), cfg.ClusterID)
+       localUserUuidRegex := regexp.MustCompile(fmt.Sprintf("^%s-tpzed-[0-9a-z]{15}$", cfg.ClusterID))
+       for _, item := range results {
+               u := item.(arvados.User)
+
+               // Remote user check
+               if !localUserUuidRegex.MatchString(u.UUID) {
+                       if cfg.Verbose {
+                               log.Printf("Remote user %q (%s) won't be considered for processing", u.Email, u.UUID)
+                       }
+                       continue
+               }
+
+               // Duplicated user id check
+               uID, err := GetUserID(u, cfg.UserID)
+               if err != nil {
+                       return err
+               }
+               if uID == "" {
+                       return fmt.Errorf("%s is empty for user with uuid %q", cfg.UserID, u.UUID)
+               }
+               if cfg.CaseInsensitive {
+                       uID = strings.ToLower(uID)
+               }
+               if alreadySeenUUID, found := userIDToUUID[uID]; found {
+                       if cfg.UserID == "username" && uID != "" {
+                               return fmt.Errorf("case insensitive collision for username %q between %q and %q", uID, u.UUID, alreadySeenUUID)
+                       } else if cfg.UserID == "email" && uID != "" {
+                               log.Printf("Duplicated email %q found in user %s - ignoring", uID, u.UUID)
+                               if len(dupedEmails[uID]) == 0 {
+                                       dupedEmails[uID] = []arvados.User{allUsers[alreadySeenUUID]}
+                               }
+                               dupedEmails[uID] = append(dupedEmails[uID], u)
+                               delete(allUsers, alreadySeenUUID) // Skip even the first occurrence,
+                               // for security purposes.
+                               continue
+                       }
+               }
+               if cfg.Verbose {
+                       log.Printf("Seen user %q (%s)", uID, u.UUID)
+               }
+               userIDToUUID[uID] = u.UUID
+               allUsers[u.UUID] = u
+               processedUsers[u.UUID] = false
+       }
+
+       loadedRecords, err := LoadInputFile(f)
+       if err != nil {
+               return fmt.Errorf("reading input file %q: %s", cfg.Path, err)
+       }
+       log.Printf("Loaded %d records from input file", len(loadedRecords))
+
+       updatesSucceeded := map[string]bool{}
+       updatesFailed := map[string]bool{}
+       updatesSkipped := map[string]bool{}
+
+       for _, record := range loadedRecords {
+               if cfg.CaseInsensitive {
+                       record.UserID = strings.ToLower(record.UserID)
+               }
+               recordUUID := userIDToUUID[record.UserID]
+               processedUsers[recordUUID] = true
+               if cfg.UserID == "email" && record.UserID == cfg.CurrentUser.Email {
+                       updatesSkipped[recordUUID] = true
+                       log.Printf("Skipping current user %q (%s) from processing", record.UserID, cfg.CurrentUser.UUID)
+                       continue
+               }
+               if updated, err := ProcessRecord(cfg, record, userIDToUUID, allUsers); err != nil {
+                       log.Printf("error processing record %q: %s", record.UserID, err)
+                       updatesFailed[recordUUID] = true
+               } else if updated {
+                       updatesSucceeded[recordUUID] = true
+               }
+       }
+
+       if cfg.DeactivateUnlisted {
+               for userUUID, user := range allUsers {
+                       if shouldSkip(cfg, user) {
+                               updatesSkipped[userUUID] = true
+                               log.Printf("Skipping unlisted user %q (%s) from deactivating", user.Email, user.UUID)
+                               continue
+                       }
+                       if !processedUsers[userUUID] && allUsers[userUUID].IsActive {
+                               if cfg.Verbose {
+                                       log.Printf("Deactivating unlisted user %q (%s)", user.Username, user.UUID)
+                               }
+                               var updatedUser arvados.User
+                               if err := UnsetupUser(cfg.Client, user.UUID, &updatedUser); err != nil {
+                                       log.Printf("error deactivating unlisted user %q: %s", user.UUID, err)
+                                       updatesFailed[userUUID] = true
+                               } else {
+                                       allUsers[userUUID] = updatedUser
+                                       updatesSucceeded[userUUID] = true
+                               }
+                       }
+               }
+       }
+
+       log.Printf("User update successes: %d, skips: %d, failures: %d", len(updatesSucceeded), len(updatesSkipped), len(updatesFailed))
+
+       // Report duplicated emails detection
+       if len(dupedEmails) > 0 {
+               emails := make([]string, len(dupedEmails))
+               i := 0
+               for e := range dupedEmails {
+                       emails[i] = e
+                       i++
+               }
+               return fmt.Errorf("skipped %d duplicated email address(es) in the cluster's local user list: %v", len(dupedEmails), emails)
+       }
+
+       return nil
+}
+
+func shouldSkip(cfg *ConfigParams, user arvados.User) bool {
+       switch user.UUID {
+       case cfg.SysUserUUID, cfg.AnonUserUUID:
+               return true
+       case cfg.CurrentUser.UUID:
+               return true
+       }
+       return false
+}
+
+type userRecord struct {
+       UserID    string
+       FirstName string
+       LastName  string
+       Active    bool
+       Admin     bool
+}
+
+func needsUpdating(user arvados.User, record userRecord) bool {
+       userData := userRecord{"", user.FirstName, user.LastName, user.IsActive, user.IsAdmin}
+       recordData := userRecord{"", record.FirstName, record.LastName, record.Active, record.Admin}
+       return userData != recordData
+}
+
+// ProcessRecord creates or updates a user based on the given record
+func ProcessRecord(cfg *ConfigParams, record userRecord, userIDToUUID map[string]string, allUsers map[string]arvados.User) (bool, error) {
+       if cfg.Verbose {
+               log.Printf("Processing record for user %q", record.UserID)
+       }
+
+       wantedActiveStatus := strconv.FormatBool(record.Active)
+       wantedAdminStatus := strconv.FormatBool(record.Active && record.Admin)
+       createRequired := false
+       updateRequired := false
+       // Check if user exists, set its active & admin status.
+       var user arvados.User
+       recordUUID := userIDToUUID[record.UserID]
+       user, found := allUsers[recordUUID]
+       if !found {
+               if cfg.Verbose {
+                       log.Printf("User %q does not exist, creating", record.UserID)
+               }
+               createRequired = true
+               err := CreateUser(cfg.Client, &user, map[string]string{
+                       cfg.UserID:   record.UserID,
+                       "first_name": record.FirstName,
+                       "last_name":  record.LastName,
+                       "is_active":  wantedActiveStatus,
+                       "is_admin":   wantedAdminStatus,
+               })
+               if err != nil {
+                       return false, fmt.Errorf("error creating user %q: %s", record.UserID, err)
+               }
+       } else if needsUpdating(user, record) {
+               updateRequired = true
+               if record.Active {
+                       if !user.IsActive && cfg.Verbose {
+                               log.Printf("User %q (%s) is inactive, activating", record.UserID, user.UUID)
+                       }
+                       // Here we assume the 'setup' is done elsewhere if needed.
+                       err := UpdateUser(cfg.Client, user.UUID, &user, map[string]string{
+                               "first_name": record.FirstName,
+                               "last_name":  record.LastName,
+                               "is_active":  wantedActiveStatus,
+                               "is_admin":   wantedAdminStatus,
+                       })
+                       if err != nil {
+                               return false, fmt.Errorf("error updating user %q: %s", record.UserID, err)
+                       }
+               } else {
+                       fnChanged := user.FirstName != record.FirstName
+                       lnChanged := user.LastName != record.LastName
+                       if fnChanged || lnChanged {
+                               err := UpdateUser(cfg.Client, user.UUID, &user, map[string]string{
+                                       "first_name": record.FirstName,
+                                       "last_name":  record.LastName,
+                               })
+                               if err != nil {
+                                       return false, fmt.Errorf("error updating user %q: %s", record.UserID, err)
+                               }
+                       }
+                       if user.IsActive {
+                               if cfg.Verbose {
+                                       log.Printf("User %q is active, deactivating", record.UserID)
+                               }
+                               err := UnsetupUser(cfg.Client, user.UUID, &user)
+                               if err != nil {
+                                       return false, fmt.Errorf("error deactivating user %q: %s", record.UserID, err)
+                               }
+                       }
+               }
+       }
+       allUsers[record.UserID] = user
+       if createRequired {
+               log.Printf("Created user %q", record.UserID)
+       }
+       if updateRequired {
+               log.Printf("Updated user %q", record.UserID)
+       }
+
+       return createRequired || updateRequired, nil
+}
+
+// LoadInputFile reads the input file and returns a list of user records
+func LoadInputFile(f *os.File) (loadedRecords []userRecord, err error) {
+       lineNo := 0
+       csvReader := csv.NewReader(f)
+       loadedRecords = make([]userRecord, 0)
+
+       for {
+               record, e := csvReader.Read()
+               if e == io.EOF {
+                       break
+               }
+               lineNo++
+               if e != nil {
+                       err = fmt.Errorf("parsing error at line %d: %s", lineNo, e)
+                       return
+               }
+               if len(record) != 5 {
+                       err = fmt.Errorf("parsing error at line %d: expected 5 fields, found %d", lineNo, len(record))
+                       return
+               }
+               userID := strings.ToLower(strings.TrimSpace(record[0]))
+               firstName := strings.TrimSpace(record[1])
+               lastName := strings.TrimSpace(record[2])
+               active := strings.TrimSpace(record[3])
+               admin := strings.TrimSpace(record[4])
+               if userID == "" || firstName == "" || lastName == "" || active == "" || admin == "" {
+                       err = fmt.Errorf("parsing error at line %d: fields cannot be empty", lineNo)
+                       return
+               }
+               activeBool, err := strconv.ParseBool(active)
+               if err != nil {
+                       return nil, fmt.Errorf("parsing error at line %d: active status not recognized", lineNo)
+               }
+               adminBool, err := strconv.ParseBool(admin)
+               if err != nil {
+                       return nil, fmt.Errorf("parsing error at line %d: admin status not recognized", lineNo)
+               }
+               loadedRecords = append(loadedRecords, userRecord{
+                       UserID:    userID,
+                       FirstName: firstName,
+                       LastName:  lastName,
+                       Active:    activeBool,
+                       Admin:     adminBool,
+               })
+       }
+       return loadedRecords, nil
+}
+
+// GetAll adds all objects of type 'resource' to the 'allItems' list
+func GetAll(c *arvados.Client, res string, params arvados.ResourceListParams, page resourceList) (allItems []interface{}, err error) {
+       // Use the maximum page size the server allows
+       limit := 1<<31 - 1
+       params.Limit = &limit
+       params.Offset = 0
+       params.Order = "uuid"
+       for {
+               if err = GetResourceList(c, &page, res, params); err != nil {
+                       return allItems, err
+               }
+               // Have we finished paging?
+               if page.Len() == 0 {
+                       break
+               }
+               allItems = append(allItems, page.GetItems()...)
+               params.Offset += page.Len()
+       }
+       return allItems, nil
+}
+
+func jsonReader(rscName string, ob interface{}) io.Reader {
+       j, err := json.Marshal(ob)
+       if err != nil {
+               panic(err)
+       }
+       v := url.Values{}
+       v[rscName] = []string{string(j)}
+       return bytes.NewBufferString(v.Encode())
+}
+
+// GetResourceList fetches res list using params
+func GetResourceList(c *arvados.Client, dst *resourceList, res string, params interface{}) error {
+       return c.RequestAndDecode(dst, "GET", "/arvados/v1/"+res, nil, params)
+}
+
+// CreateUser creates a user with userData parameters, assigns it to dst
+func CreateUser(c *arvados.Client, dst *arvados.User, userData map[string]string) error {
+       return c.RequestAndDecode(dst, "POST", "/arvados/v1/users", jsonReader("user", userData), nil)
+}
+
+// UpdateUser updates a user with userData parameters
+func UpdateUser(c *arvados.Client, userUUID string, dst *arvados.User, userData map[string]string) error {
+       return c.RequestAndDecode(&dst, "PUT", "/arvados/v1/users/"+userUUID, jsonReader("user", userData), nil)
+}
+
+// UnsetupUser deactivates a user
+func UnsetupUser(c *arvados.Client, userUUID string, dst *arvados.User) error {
+       return c.RequestAndDecode(&dst, "POST", "/arvados/v1/users/"+userUUID+"/unsetup", nil, nil)
+}
diff --git a/tools/sync-users/sync-users_test.go b/tools/sync-users/sync-users_test.go
new file mode 100644 (file)
index 0000000..8b5385a
--- /dev/null
@@ -0,0 +1,436 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "fmt"
+       "io/ioutil"
+       "os"
+       "regexp"
+       "strings"
+       "testing"
+
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       . "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       TestingT(t)
+}
+
+type TestSuite struct {
+       cfg   *ConfigParams
+       ac    *arvados.Client
+       users map[string]arvados.User
+}
+
+func (s *TestSuite) SetUpTest(c *C) {
+       s.ac = arvados.NewClientFromEnv()
+       u, err := s.ac.CurrentUser()
+       c.Assert(err, IsNil)
+       c.Assert(u.IsAdmin, Equals, true)
+
+       s.users = make(map[string]arvados.User)
+       ul := arvados.UserList{}
+       s.ac.RequestAndDecode(&ul, "GET", "/arvados/v1/users", nil, arvados.ResourceListParams{})
+       c.Assert(ul.ItemsAvailable, Not(Equals), 0)
+       s.users = make(map[string]arvados.User)
+       for _, u := range ul.Items {
+               s.users[u.UUID] = u
+       }
+
+       // Set up command config
+       os.Args = []string{"cmd", "somefile.csv"}
+       config, err := GetConfig()
+       c.Assert(err, IsNil)
+       s.cfg = &config
+}
+
+func (s *TestSuite) TearDownTest(c *C) {
+       var dst interface{}
+       // Reset database to fixture state after every test run.
+       err := s.cfg.Client.RequestAndDecode(&dst, "POST", "/database/reset", nil, nil)
+       c.Assert(err, IsNil)
+}
+
+var _ = Suite(&TestSuite{})
+
+// MakeTempCSVFile creates a temp file with data as comma separated values
+func MakeTempCSVFile(data [][]string) (f *os.File, err error) {
+       f, err = ioutil.TempFile("", "test_sync_users")
+       if err != nil {
+               return
+       }
+       for _, line := range data {
+               fmt.Fprintf(f, "%s\n", strings.Join(line, ","))
+       }
+       err = f.Close()
+       return
+}
+
+// RecordsToStrings formats the input data suitable for MakeTempCSVFile
+func RecordsToStrings(records []userRecord) [][]string {
+       data := [][]string{}
+       for _, u := range records {
+               data = append(data, []string{
+                       u.UserID,
+                       u.FirstName,
+                       u.LastName,
+                       fmt.Sprintf("%t", u.Active),
+                       fmt.Sprintf("%t", u.Admin)})
+       }
+       return data
+}
+
+func ListUsers(ac *arvados.Client) ([]arvados.User, error) {
+       var ul arvados.UserList
+       err := ac.RequestAndDecode(&ul, "GET", "/arvados/v1/users", nil, arvados.ResourceListParams{})
+       if err != nil {
+               return nil, err
+       }
+       return ul.Items, nil
+}
+
+func (s *TestSuite) TestParseFlagsWithoutPositionalArgument(c *C) {
+       os.Args = []string{"cmd", "-verbose"}
+       err := ParseFlags(&ConfigParams{})
+       c.Assert(err, NotNil)
+       c.Assert(err, ErrorMatches, ".*please provide a path to an input file.*")
+}
+
+func (s *TestSuite) TestParseFlagsWrongUserID(c *C) {
+       os.Args = []string{"cmd", "-user-id=nickname", "/tmp/somefile.csv"}
+       err := ParseFlags(&ConfigParams{})
+       c.Assert(err, NotNil)
+       c.Assert(err, ErrorMatches, ".*user ID must be one of:.*")
+}
+
+func (s *TestSuite) TestParseFlagsWithPositionalArgument(c *C) {
+       cfg := ConfigParams{}
+       os.Args = []string{"cmd", "/tmp/somefile.csv"}
+       err := ParseFlags(&cfg)
+       c.Assert(err, IsNil)
+       c.Assert(cfg.Path, Equals, "/tmp/somefile.csv")
+       c.Assert(cfg.Verbose, Equals, false)
+       c.Assert(cfg.DeactivateUnlisted, Equals, false)
+       c.Assert(cfg.UserID, Equals, "email")
+       c.Assert(cfg.CaseInsensitive, Equals, true)
+}
+
+func (s *TestSuite) TestParseFlagsWithOptionalFlags(c *C) {
+       cfg := ConfigParams{}
+       os.Args = []string{"cmd", "-verbose", "-deactivate-unlisted", "-user-id=username", "/tmp/somefile.csv"}
+       err := ParseFlags(&cfg)
+       c.Assert(err, IsNil)
+       c.Assert(cfg.Path, Equals, "/tmp/somefile.csv")
+       c.Assert(cfg.Verbose, Equals, true)
+       c.Assert(cfg.DeactivateUnlisted, Equals, true)
+       c.Assert(cfg.UserID, Equals, "username")
+       c.Assert(cfg.CaseInsensitive, Equals, false)
+}
+
+func (s *TestSuite) TestGetConfig(c *C) {
+       os.Args = []string{"cmd", "/tmp/somefile.csv"}
+       cfg, err := GetConfig()
+       c.Assert(err, IsNil)
+       c.Assert(cfg.AnonUserUUID, Not(Equals), "")
+       c.Assert(cfg.SysUserUUID, Not(Equals), "")
+       c.Assert(cfg.CurrentUser, Not(Equals), "")
+       c.Assert(cfg.ClusterID, Not(Equals), "")
+       c.Assert(cfg.Client, NotNil)
+}
+
+func (s *TestSuite) TestFailOnEmptyFields(c *C) {
+       records := [][]string{
+               {"", "first-name", "last-name", "1", "0"},
+               {"user@example", "", "last-name", "1", "0"},
+               {"user@example", "first-name", "", "1", "0"},
+               {"user@example", "first-name", "last-name", "", "0"},
+               {"user@example", "first-name", "last-name", "1", ""},
+       }
+       for _, record := range records {
+               data := [][]string{record}
+               tmpfile, err := MakeTempCSVFile(data)
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, NotNil)
+               c.Assert(err, ErrorMatches, ".*fields cannot be empty.*")
+       }
+}
+
+func (s *TestSuite) TestIgnoreSpaces(c *C) {
+       // Make sure users aren't already there from fixtures
+       for _, user := range s.users {
+               e := user.Email
+               found := e == "user1@example.com" || e == "user2@example.com" || e == "user3@example.com"
+               c.Assert(found, Equals, false)
+       }
+       // Use CSV data with leading/trailing whitespaces, confirm that they get ignored
+       data := [][]string{
+               {" user1@example.com", "  Example", "   User1", "1", "0"},
+               {"user2@example.com ", "Example  ", "User2   ", "1", "0"},
+               {" user3@example.com ", "  Example  ", "   User3   ", "1", "0"},
+       }
+       tmpfile, err := MakeTempCSVFile(data)
+       c.Assert(err, IsNil)
+       defer os.Remove(tmpfile.Name())
+       s.cfg.Path = tmpfile.Name()
+       err = doMain(s.cfg)
+       c.Assert(err, IsNil)
+       users, err := ListUsers(s.cfg.Client)
+       c.Assert(err, IsNil)
+       for _, userNr := range []int{1, 2, 3} {
+               found := false
+               for _, user := range users {
+                       if user.Email == fmt.Sprintf("user%d@example.com", userNr) &&
+                               user.LastName == fmt.Sprintf("User%d", userNr) &&
+                               user.FirstName == "Example" && user.IsActive == true {
+                               found = true
+                               break
+                       }
+               }
+               c.Assert(found, Equals, true)
+       }
+}
+
+// Error out when records have != 5 records
+func (s *TestSuite) TestWrongNumberOfFields(c *C) {
+       for _, testCase := range [][][]string{
+               {{"user1@example.com", "Example", "User1", "1"}},
+               {{"user1@example.com", "Example", "User1", "1", "0", "extra data"}},
+       } {
+               tmpfile, err := MakeTempCSVFile(testCase)
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, NotNil)
+               c.Assert(err, ErrorMatches, ".*expected 5 fields, found.*")
+       }
+}
+
+// Error out when records have incorrect data types
+func (s *TestSuite) TestWrongDataFields(c *C) {
+       for _, testCase := range [][][]string{
+               {{"user1@example.com", "Example", "User1", "yep", "0"}},
+               {{"user1@example.com", "Example", "User1", "1", "nope"}},
+       } {
+               tmpfile, err := MakeTempCSVFile(testCase)
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, NotNil)
+               c.Assert(err, ErrorMatches, ".*parsing error at line.*[active|admin] status not recognized.*")
+       }
+}
+
+// Create, activate and deactivate users
+func (s *TestSuite) TestUserCreationAndUpdate(c *C) {
+       for _, tc := range []string{"email", "username"} {
+               uIDPrefix := tc
+               uIDSuffix := ""
+               if tc == "email" {
+                       uIDSuffix = "@example.com"
+               }
+               s.cfg.UserID = tc
+               records := []userRecord{{
+                       UserID:    fmt.Sprintf("%suser1%s", uIDPrefix, uIDSuffix),
+                       FirstName: "Example",
+                       LastName:  "User1",
+                       Active:    true,
+                       Admin:     false,
+               }, {
+                       UserID:    fmt.Sprintf("%suser2%s", uIDPrefix, uIDSuffix),
+                       FirstName: "Example",
+                       LastName:  "User2",
+                       Active:    false, // initially inactive
+                       Admin:     false,
+               }, {
+                       UserID:    fmt.Sprintf("%sadmin1%s", uIDPrefix, uIDSuffix),
+                       FirstName: "Example",
+                       LastName:  "Admin1",
+                       Active:    true,
+                       Admin:     true,
+               }, {
+                       UserID:    fmt.Sprintf("%sadmin2%s", uIDPrefix, uIDSuffix),
+                       FirstName: "Example",
+                       LastName:  "Admin2",
+                       Active:    false, // initially inactive
+                       Admin:     true,
+               }}
+               // Make sure users aren't already there from fixtures
+               for _, user := range s.users {
+                       uID, err := GetUserID(user, s.cfg.UserID)
+                       c.Assert(err, IsNil)
+                       found := false
+                       for _, r := range records {
+                               if uID == r.UserID {
+                                       found = true
+                                       break
+                               }
+                       }
+                       c.Assert(found, Equals, false)
+               }
+               // User creation
+               tmpfile, err := MakeTempCSVFile(RecordsToStrings(records))
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, IsNil)
+
+               users, err := ListUsers(s.cfg.Client)
+               c.Assert(err, IsNil)
+               for _, r := range records {
+                       var foundUser arvados.User
+                       for _, user := range users {
+                               uID, err := GetUserID(user, s.cfg.UserID)
+                               c.Assert(err, IsNil)
+                               if uID == r.UserID {
+                                       // Add an @example.com email if missing
+                                       // (to avoid database reset errors)
+                                       if tc == "username" && user.Email == "" {
+                                               err := UpdateUser(s.cfg.Client, user.UUID, &user, map[string]string{
+                                                       "email": fmt.Sprintf("%s@example.com", user.Username),
+                                               })
+                                               c.Assert(err, IsNil)
+                                       }
+                                       foundUser = user
+                                       break
+                               }
+                       }
+                       c.Assert(foundUser, NotNil)
+                       c.Logf("Checking creation for user %q", r.UserID)
+                       c.Assert(foundUser.FirstName, Equals, r.FirstName)
+                       c.Assert(foundUser.LastName, Equals, r.LastName)
+                       c.Assert(foundUser.IsActive, Equals, r.Active)
+                       c.Assert(foundUser.IsAdmin, Equals, (r.Active && r.Admin))
+               }
+               // User update
+               for idx := range records {
+                       records[idx].Active = !records[idx].Active
+                       records[idx].FirstName = records[idx].FirstName + "Updated"
+                       records[idx].LastName = records[idx].LastName + "Updated"
+               }
+               tmpfile, err = MakeTempCSVFile(RecordsToStrings(records))
+               c.Assert(err, IsNil)
+               defer os.Remove(tmpfile.Name())
+               s.cfg.Path = tmpfile.Name()
+               err = doMain(s.cfg)
+               c.Assert(err, IsNil)
+
+               users, err = ListUsers(s.cfg.Client)
+               c.Assert(err, IsNil)
+               for _, r := range records {
+                       var foundUser arvados.User
+                       for _, user := range users {
+                               uID, err := GetUserID(user, s.cfg.UserID)
+                               c.Assert(err, IsNil)
+                               if uID == r.UserID {
+                                       foundUser = user
+                                       break
+                               }
+                       }
+                       c.Assert(foundUser, NotNil)
+                       c.Logf("Checking update for user %q", r.UserID)
+                       c.Assert(foundUser.FirstName, Equals, r.FirstName)
+                       c.Assert(foundUser.LastName, Equals, r.LastName)
+                       c.Assert(foundUser.IsActive, Equals, r.Active)
+                       c.Assert(foundUser.IsAdmin, Equals, (r.Active && r.Admin))
+               }
+       }
+}
+
+func (s *TestSuite) TestDeactivateUnlisted(c *C) {
+       localUserUuidRegex := regexp.MustCompile(fmt.Sprintf("^%s-tpzed-[0-9a-z]{15}$", s.cfg.ClusterID))
+       users, err := ListUsers(s.cfg.Client)
+       c.Assert(err, IsNil)
+       previouslyActiveUsers := 0
+       for _, u := range users {
+               if u.UUID == fmt.Sprintf("%s-tpzed-anonymouspublic", s.cfg.ClusterID) && !u.IsActive {
+                       // Make sure the anonymous user is active for this test
+                       var au arvados.User
+                       err := UpdateUser(s.cfg.Client, u.UUID, &au, map[string]string{"is_active": "true"})
+                       c.Assert(err, IsNil)
+                       c.Assert(au.IsActive, Equals, true)
+               }
+               if localUserUuidRegex.MatchString(u.UUID) && u.IsActive {
+                       previouslyActiveUsers++
+               }
+       }
+       // At least 3 active users: System root, Anonymous and the current user.
+       // Other active users should exist from fixture.
+       c.Logf("Initial active users count: %d", previouslyActiveUsers)
+       c.Assert(previouslyActiveUsers > 3, Equals, true)
+
+       s.cfg.DeactivateUnlisted = true
+       s.cfg.Verbose = true
+       data := [][]string{
+               {"user1@example.com", "Example", "User1", "0", "0"},
+       }
+       tmpfile, err := MakeTempCSVFile(data)
+       c.Assert(err, IsNil)
+       defer os.Remove(tmpfile.Name())
+       s.cfg.Path = tmpfile.Name()
+       err = doMain(s.cfg)
+       c.Assert(err, IsNil)
+
+       users, err = ListUsers(s.cfg.Client)
+       c.Assert(err, IsNil)
+       currentlyActiveUsers := 0
+       acceptableActiveUUIDs := map[string]bool{
+               fmt.Sprintf("%s-tpzed-000000000000000", s.cfg.ClusterID): true,
+               fmt.Sprintf("%s-tpzed-anonymouspublic", s.cfg.ClusterID): true,
+               s.cfg.CurrentUser.UUID: true,
+       }
+       remainingActiveUUIDs := map[string]bool{}
+       seenUserEmails := map[string]bool{}
+       for _, u := range users {
+               if _, ok := seenUserEmails[u.Email]; ok {
+                       c.Errorf("Duplicated email address %q in user list (probably from fixtures). This test requires unique email addresses.", u.Email)
+               }
+               seenUserEmails[u.Email] = true
+               if localUserUuidRegex.MatchString(u.UUID) && u.IsActive {
+                       c.Logf("Found remaining active user %q (%s)", u.Email, u.UUID)
+                       _, ok := acceptableActiveUUIDs[u.UUID]
+                       c.Assert(ok, Equals, true)
+                       remainingActiveUUIDs[u.UUID] = true
+                       currentlyActiveUsers++
+               }
+       }
+       // 3 active users remaining: System root, Anonymous and the current user.
+       c.Logf("Active local users remaining: %v", remainingActiveUUIDs)
+       c.Assert(currentlyActiveUsers, Equals, 3)
+}
+
+func (s *TestSuite) TestFailOnDuplicatedEmails(c *C) {
+       for i := range []int{1, 2} {
+               isAdmin := i == 2
+               err := CreateUser(s.cfg.Client, &arvados.User{}, map[string]string{
+                       "email":      "somedupedemail@example.com",
+                       "first_name": fmt.Sprintf("Duped %d", i),
+                       "username":   fmt.Sprintf("dupedemail%d", i),
+                       "last_name":  "User",
+                       "is_active":  "true",
+                       "is_admin":   fmt.Sprintf("%t", isAdmin),
+               })
+               c.Assert(err, IsNil)
+       }
+       s.cfg.Verbose = true
+       data := [][]string{
+               {"user1@example.com", "Example", "User1", "0", "0"},
+       }
+       tmpfile, err := MakeTempCSVFile(data)
+       c.Assert(err, IsNil)
+       defer os.Remove(tmpfile.Name())
+       s.cfg.Path = tmpfile.Name()
+       err = doMain(s.cfg)
+       c.Assert(err, NotNil)
+       c.Assert(err, ErrorMatches, "skipped.*duplicated email address.*")
+}