15531: Fed migrate script passes test cases
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Mon, 23 Sep 2019 15:32:03 +0000 (11:32 -0400)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Mon, 23 Sep 2019 15:32:03 +0000 (11:32 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

sdk/python/arvados/commands/federation_migrate.py
sdk/python/tests/fed-migrate/check.py [new file with mode: 0644]
sdk/python/tests/fed-migrate/fed-migrate.cwl
sdk/python/tests/fed-migrate/run-test.cwl
sdk/python/tests/fed-migrate/run-test.cwlex
tools/arvbox/bin/arvbox
tools/arvbox/lib/arvbox/docker/cluster-config.sh

index 664a997659b70e2436467b76427eae08b9647282..ec846d2c4aa2c7c298a2f092839de373aa1e3fb1 100755 (executable)
@@ -116,8 +116,8 @@ def main():
                     homeuuid = ""
             for a in accum:
                 r = (a["email"], a["username"], a["uuid"], loginCluster or homeuuid[0:5])
-                by_email.setdefault(a["email"], [])
-                by_email[a["email"]].append(r)
+                by_email.setdefault(a["email"], {})
+                by_email[a["email"]][a["uuid"]] = r
                 rows.append(r)
             lastemail = u["email"]
             accum = [u]
@@ -130,8 +130,8 @@ def main():
             homeuuid = ""
     for a in accum:
         r = (a["email"], a["username"], a["uuid"], loginCluster or homeuuid[0:5])
-        by_email.setdefault(a["email"], [])
-        by_email[a["email"]].append(r)
+        by_email.setdefault(a["email"], {})
+        by_email[a["email"]][a["uuid"]] = r
         rows.append(r)
 
     if args.report:
@@ -147,13 +147,13 @@ def main():
             print("Performing dry run")
 
         rows = []
-        by_email = {}
+
         with open(args.migrate or args.dry_run, "rt") as f:
             for r in csv.reader(f):
                 if r[0] == "email":
                     continue
-                by_email.setdefault(r[0], [])
-                by_email[r[0]].append(r)
+                by_email.setdefault(r[0], {})
+                by_email[r[0]][r[2]] = r
                 rows.append(r)
 
         for r in rows:
@@ -165,10 +165,22 @@ def main():
             if userhome == "":
                 print("(%s) Skipping %s, no home cluster specified" % (email, old_user_uuid))
             if old_user_uuid.startswith(userhome):
+                migratecluster = old_user_uuid[0:5]
+                migratearv = clusters[migratecluster]
+                if migratearv.users().get(uuid=old_user_uuid).execute()["username"] != username:
+                    print("(%s) Updating username of %s to '%s' on %s" % (email, old_user_uuid, username, migratecluster))
+                    if not args.dry_run:
+                        try:
+                            conflicts = migratearv.users().list(filters=[["username", "=", username]]).execute()
+                            if conflicts["items"]:
+                                migratearv.users().update(uuid=conflicts["items"][0]["uuid"], body={"user": {"username": username+"migrate"}}).execute()
+                            migratearv.users().update(uuid=old_user_uuid, body={"user": {"username": username}}).execute()
+                        except arvados.errors.ApiError as e:
+                            print("(%s) Error updating username of %s to '%s' on %s: %s" % (email, old_user_uuid, username, migratecluster, e))
                 continue
             candidates = []
             conflict = False
-            for b in by_email[email]:
+            for b in by_email[email].values():
                 if b[2].startswith(userhome):
                     candidates.append(b)
                 if b[1] != username and b[3] == userhome:
@@ -196,13 +208,11 @@ def main():
                         continue
 
                     tup = (email, username, user["uuid"], userhome)
-                    by_email[email].append(tup)
-                    candidates.append(tup)
                 else:
                     # dry run
                     tup = (email, username, "%s-tpzed-xfakexfakexfake" % (userhome[0:5]), userhome)
-                    by_email[email].append(tup)
-                    candidates.append(tup)
+                by_email[email][tup[2]] = tup
+                candidates.append(tup)
             if len(candidates) > 1:
                 print("(%s) Multiple users listed to migrate %s to %s, use full uuid" % (email, old_user_uuid, userhome))
                 continue
@@ -282,10 +292,11 @@ def main():
 
                 if newuser['username'] != username:
                     try:
-                        conflicts = migratearv.users().list(filters=[["username", "=", username]]).execute()
-                        if conflicts["items"]:
-                            migratearv.users().update(uuid=conflicts["items"][0]["uuid"], body={"user": {"username": username+"migrate"}}).execute()
-                        migratearv.users().update(uuid=new_user_uuid, body={"user": {"username": username}}).execute()
+                        if not args.dry_run:
+                            conflicts = migratearv.users().list(filters=[["username", "=", username]]).execute()
+                            if conflicts["items"]:
+                                migratearv.users().update(uuid=conflicts["items"][0]["uuid"], body={"user": {"username": username+"migrate"}}).execute()
+                            migratearv.users().update(uuid=new_user_uuid, body={"user": {"username": username}}).execute()
                     except arvados.errors.ApiError as e:
                         print("(%s) Error updating username of %s to '%s' on %s: %s" % (email, new_user_uuid, username, migratecluster, e))
 
diff --git a/sdk/python/tests/fed-migrate/check.py b/sdk/python/tests/fed-migrate/check.py
new file mode 100644 (file)
index 0000000..3927954
--- /dev/null
@@ -0,0 +1,45 @@
+import arvados
+import json
+import sys
+
+j = json.load(open(sys.argv[1]))
+
+apiA = arvados.api(host=j["arvados_api_hosts"][0], token=j["superuser_tokens"][0], insecure=True)
+apiB = arvados.api(host=j["arvados_api_hosts"][1], token=j["superuser_tokens"][1], insecure=True)
+apiC = arvados.api(host=j["arvados_api_hosts"][2], token=j["superuser_tokens"][2], insecure=True)
+
+users = apiA.users().list().execute()
+
+assert len(users["items"]) == 10
+
+by_username = {}
+
+for i in range(1, 9):
+    found = False
+    for u in users["items"]:
+        if u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i):
+            found = True
+            by_username[u["username"]] = u["uuid"]
+    assert found
+
+users = apiB.users().list().execute()
+assert len(users["items"]) == 10
+
+for i in range(2, 9):
+    found = False
+    for u in users["items"]:
+        if u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i) and u["uuid"] == by_username[u["username"]]:
+            found = True
+    assert found
+
+users = apiC.users().list().execute()
+assert len(users["items"]) == 10
+
+for i in range(2, 9):
+    found = False
+    for u in users["items"]:
+        if u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i) and u["uuid"] == by_username[u["username"]]:
+            found = True
+    assert found
+
+print("Passed checks")
index a94dfb5b68527d9ff1a93cac175c17a084e6b19a..313946dd3db5c43493b4f2c9bae813284e52be01 100644 (file)
@@ -16,8 +16,8 @@ $graph:
         id: fed_migrate
         type: string
     outputs:
-      - id: report
-        outputSource: main_2/report
+      - id: report3
+        outputSource: main_2/report3
         type: File
     requirements:
       InlineJavascriptRequirement: {}
@@ -89,23 +89,22 @@ $graph:
             valueFrom: '$(inputs.superuser_tokens[0])'
         out:
           - report
+          - report2
+          - report3
+          - r
         run:
-          arguments:
-            - $(inputs.fed_migrate)
-            - '--report'
-            - report.csv
-          class: CommandLineTool
+          class: Workflow
           id: main_2_embed
           inputs:
-            - id: arvados_api_hosts
+            - id: ar
               type:
                 items: string
                 type: array
-            - id: superuser_tokens
+            - id: arvados_api_hosts
               type:
                 items: string
                 type: array
-            - id: ar
+            - id: superuser_tokens
               type:
                 items: string
                 type: array
@@ -117,14 +116,179 @@ $graph:
               type: Any
           outputs:
             - id: report
-              outputBinding:
-                glob: report.csv
+              outputSource: main_2_embed_1/report
+              type: File
+            - id: report2
+              outputSource: main_2_embed_2/report2
+              type: File
+            - id: report3
+              outputSource: main_2_embed_3/report3
+              type: File
+            - id: r
+              outputSource: main_2_embed_4/r
               type: File
           requirements:
             - class: EnvVarRequirement
               envDef:
                 ARVADOS_API_HOST: $(inputs.host)
                 ARVADOS_API_TOKEN: $(inputs.token)
+          steps:
+            - id: main_2_embed_1
+              in:
+                fed_migrate:
+                  source: fed_migrate
+                host:
+                  source: host
+                token:
+                  source: token
+              out:
+                - report
+              run:
+                arguments:
+                  - $(inputs.fed_migrate)
+                  - '--report'
+                  - report.csv
+                class: CommandLineTool
+                id: main_2_embed_1_embed
+                inputs:
+                  - id: fed_migrate
+                    type: string
+                  - id: host
+                    type: Any
+                  - id: token
+                    type: Any
+                outputs:
+                  - id: report
+                    outputBinding:
+                      glob: report.csv
+                    type: File
+                requirements:
+                  InlineJavascriptRequirement: {}
+            - id: main_2_embed_2
+              in:
+                host:
+                  source: host
+                report:
+                  source: main_2_embed_1/report
+                token:
+                  source: token
+              out:
+                - report2
+              run:
+                arguments:
+                  - sed
+                  - '-E'
+                  - 's/,(case[1-8])2?,/,\1,/g'
+                class: CommandLineTool
+                id: main_2_embed_2_embed
+                inputs:
+                  - id: report
+                    type: File
+                  - id: host
+                    type: Any
+                  - id: token
+                    type: Any
+                outputs:
+                  - id: report2
+                    outputBinding:
+                      glob: report.csv
+                    type: File
+                requirements:
+                  InlineJavascriptRequirement: {}
+                stdin: $(inputs.report.path)
+                stdout: report.csv
+            - id: main_2_embed_3
+              in:
+                fed_migrate:
+                  source: fed_migrate
+                host:
+                  source: host
+                report2:
+                  source: main_2_embed_2/report2
+                token:
+                  source: token
+              out:
+                - report3
+              run:
+                arguments:
+                  - $(inputs.fed_migrate)
+                  - '--migrate'
+                  - $(inputs.report2)
+                class: CommandLineTool
+                id: main_2_embed_3_embed
+                inputs:
+                  - id: report2
+                    type: File
+                  - id: fed_migrate
+                    type: string
+                  - id: host
+                    type: Any
+                  - id: token
+                    type: Any
+                outputs:
+                  - id: report3
+                    outputBinding:
+                      outputEval: $(inputs.report2)
+                    type: File
+                requirements:
+                  InlineJavascriptRequirement: {}
+            - id: main_2_embed_4
+              in:
+                arvados_api_hosts:
+                  source: arvados_api_hosts
+                check:
+                  default:
+                    class: File
+                    location: check.py
+                host:
+                  source: host
+                report3:
+                  source: main_2_embed_3/report3
+                superuser_tokens:
+                  source: superuser_tokens
+                token:
+                  source: token
+              out:
+                - r
+              run:
+                arguments:
+                  - python
+                  - $(inputs.check)
+                  - _script
+                class: CommandLineTool
+                id: main_2_embed_4_embed
+                inputs:
+                  - id: report3
+                    type: File
+                  - id: host
+                    type: Any
+                  - id: token
+                    type: Any
+                  - id: arvados_api_hosts
+                    type:
+                      items: string
+                      type: array
+                  - id: superuser_tokens
+                    type:
+                      items: string
+                      type: array
+                  - id: check
+                    type: File
+                outputs:
+                  - id: r
+                    outputBinding:
+                      outputEval: $(inputs.report3)
+                    type: File
+                requirements:
+                  InitialWorkDirRequirement:
+                    listing:
+                      - entry: |
+                          {
+                            "arvados_api_hosts": $(inputs.arvados_api_hosts),
+                            "superuser_tokens": $(inputs.superuser_tokens)
+                          }
+                        entryname: _script
+                  InlineJavascriptRequirement: {}
   - arguments:
       - arvbox
       - cat
@@ -177,7 +341,7 @@ $graph:
           items: string
           type: array
       - id: report
-        outputSource: run_test_3/report
+        outputSource: run_test_3/report3
         type: File
     requirements:
       InlineJavascriptRequirement: {}
@@ -368,7 +532,7 @@ $graph:
           superuser_tokens:
             source: main_2/supertok
         out:
-          - report
+          - report3
         run: '#run_test'
 cwlVersion: v1.0
 
index ea412ac8e1bd763b6e6ece068110eb0410e5e20d..623a9c11f5c2108919fb31f5502665d2aa343c0f 100644 (file)
@@ -15,8 +15,8 @@ inputs:
     id: fed_migrate
     type: string
 outputs:
-  - id: out
-    outputSource: main_2/out
+  - id: report3
+    outputSource: main_2/report3
     type: File
 requirements:
   InlineJavascriptRequirement: {}
@@ -87,24 +87,23 @@ steps:
       token:
         valueFrom: '$(inputs.superuser_tokens[0])'
     out:
-      - out
+      - report
+      - report2
+      - report3
+      - r
     run:
-      arguments:
-        - $(inputs.fed_migrate)
-        - '--report'
-        - out
-      class: CommandLineTool
+      class: Workflow
       id: main_2_embed
       inputs:
-        - id: arvados_api_hosts
+        - id: ar
           type:
             items: string
             type: array
-        - id: superuser_tokens
+        - id: arvados_api_hosts
           type:
             items: string
             type: array
-        - id: ar
+        - id: superuser_tokens
           type:
             items: string
             type: array
@@ -115,13 +114,178 @@ steps:
         - id: token
           type: Any
       outputs:
-        - id: out
-          outputBinding:
-            glob: out
+        - id: report
+          outputSource: main_2_embed_1/report
+          type: File
+        - id: report2
+          outputSource: main_2_embed_2/report2
+          type: File
+        - id: report3
+          outputSource: main_2_embed_3/report3
+          type: File
+        - id: r
+          outputSource: main_2_embed_4/r
           type: File
       requirements:
         - class: EnvVarRequirement
           envDef:
             ARVADOS_API_HOST: $(inputs.host)
             ARVADOS_API_TOKEN: $(inputs.token)
+      steps:
+        - id: main_2_embed_1
+          in:
+            fed_migrate:
+              source: fed_migrate
+            host:
+              source: host
+            token:
+              source: token
+          out:
+            - report
+          run:
+            arguments:
+              - $(inputs.fed_migrate)
+              - '--report'
+              - report.csv
+            class: CommandLineTool
+            id: main_2_embed_1_embed
+            inputs:
+              - id: fed_migrate
+                type: string
+              - id: host
+                type: Any
+              - id: token
+                type: Any
+            outputs:
+              - id: report
+                outputBinding:
+                  glob: report.csv
+                type: File
+            requirements:
+              InlineJavascriptRequirement: {}
+        - id: main_2_embed_2
+          in:
+            host:
+              source: host
+            report:
+              source: main_2_embed_1/report
+            token:
+              source: token
+          out:
+            - report2
+          run:
+            arguments:
+              - sed
+              - '-E'
+              - 's/,(case[1-8])2?,/,1,/g'
+            class: CommandLineTool
+            id: main_2_embed_2_embed
+            inputs:
+              - id: report
+                type: File
+              - id: host
+                type: Any
+              - id: token
+                type: Any
+            outputs:
+              - id: report2
+                outputBinding:
+                  glob: report.csv
+                type: File
+            requirements:
+              InlineJavascriptRequirement: {}
+            stdin: $(inputs.report)
+            stdout: report.csv
+        - id: main_2_embed_3
+          in:
+            fed_migrate:
+              source: fed_migrate
+            host:
+              source: host
+            report2:
+              source: main_2_embed_2/report2
+            token:
+              source: token
+          out:
+            - report3
+          run:
+            arguments:
+              - $(inputs.fed_migrate)
+              - '--migrate'
+              - $(inputs.report)
+            class: CommandLineTool
+            id: main_2_embed_3_embed
+            inputs:
+              - id: report2
+                type: File
+              - id: fed_migrate
+                type: string
+              - id: host
+                type: Any
+              - id: token
+                type: Any
+            outputs:
+              - id: report3
+                outputBinding:
+                  outputEval: $(inputs.report2)
+                type: File
+            requirements:
+              InlineJavascriptRequirement: {}
+        - id: main_2_embed_4
+          in:
+            arvados_api_hosts:
+              source: arvados_api_hosts
+            check:
+              default:
+                class: File
+                location: check.py
+            host:
+              source: host
+            report3:
+              source: main_2_embed_3/report3
+            superuser_tokens:
+              source: superuser_tokens
+            token:
+              source: token
+          out:
+            - r
+          run:
+            arguments:
+              - python
+              - $(inputs.check)
+              - _script
+            class: CommandLineTool
+            id: main_2_embed_4_embed
+            inputs:
+              - id: report3
+                type: File
+              - id: host
+                type: Any
+              - id: token
+                type: Any
+              - id: arvados_api_hosts
+                type:
+                  items: string
+                  type: array
+              - id: superuser_tokens
+                type:
+                  items: string
+                  type: array
+              - id: check
+                type: File
+            outputs:
+              - id: r
+                outputBinding:
+                  outputEval: $(inputs.report3)
+                type: File
+            requirements:
+              InitialWorkDirRequirement:
+                listing:
+                  - entry: |
+                      {
+                        "arvados_api_hosts": $(inputs.arvados_api_hosts),
+                        "superuser_tokens": $(inputs.superuser_tokens)
+                      }
+                    entryname: _script
+              InlineJavascriptRequirement: {}
 
index 3dda1fe7fd823e3e40792efc0e9103a421a3cad3..ef37c51520e0d7c2e9319a70767d02a6eea2d9da 100644 (file)
@@ -11,13 +11,13 @@ def workflow main(
   "superuser_tokens": $(inputs.superuser_tokens)
 }
 >>>
-  return arvados_api_hosts as ar
+    return arvados_api_hosts as ar
   }
 
-  run tool(arvados_api_hosts, superuser_tokens, ar,
-           fed_migrate,
-           host=$(inputs.arvados_api_hosts[0]),
-          token=$(inputs.superuser_tokens[0])) {
+  run workflow(ar, arvados_api_hosts, superuser_tokens,
+               fed_migrate,
+              host=$(inputs.arvados_api_hosts[0]),
+              token=$(inputs.superuser_tokens[0])) {
     requirements {
       EnvVarRequirement {
         envDef: {
@@ -26,9 +26,32 @@ def workflow main(
        }
       }
     }
-    $(inputs.fed_migrate) --report report.csv
-    return File("report.csv") as report
+
+    run tool(fed_migrate, host, token) {
+      $(inputs.fed_migrate) --report report.csv
+      return File("report.csv") as report
+    }
+
+    run tool(report, host, token) {
+      sed -E 's/,(case[1-8])2?,/,\\1,/g' < $(inputs.report.path) > report.csv
+      return File("report.csv") as report2
+    }
+
+    run tool(report2, fed_migrate, host, token) {
+      $(inputs.fed_migrate) --migrate $(inputs.report2)
+      return report2 as report3
+    }
+
+    run tool(report3, host, token, arvados_api_hosts, superuser_tokens, check=File("check.py")) {
+      python $(inputs.check) <<<
+{
+  "arvados_api_hosts": $(inputs.arvados_api_hosts),
+  "superuser_tokens": $(inputs.superuser_tokens)
+}
+>>>
+    return report3 as r
+    }
   }
 
-  return report
+  return report3
 }
\ No newline at end of file
index e56fbd489be3312c0e9159379d351a75acd20630..2999d31930c64a2f35cba23bef33d3aa4697f749 100755 (executable)
@@ -242,6 +242,8 @@ run() {
         fi
         if ! test -d "$COMPOSER_ROOT" ; then
             git clone https://github.com/curoverse/composer.git "$COMPOSER_ROOT"
+            git -C "$COMPOSER_ROOT" checkout arvados-fork
+            git -C "$COMPOSER_ROOT" pull
         fi
         if ! test -d "$WORKBENCH2_ROOT" ; then
             git clone https://github.com/curoverse/arvados-workbench2.git "$WORKBENCH2_ROOT"
@@ -594,6 +596,39 @@ case "$subcmd" in
        exec docker exec -ti $ARVBOX_CONTAINER bash -c 'PGPASSWORD=$(cat /var/lib/arvados/api_database_pw) exec psql --dbname=arvados_development --host=localhost --username=arvados'
        ;;
 
+    checkpoint)
+       exec docker exec -ti $ARVBOX_CONTAINER bash -c 'PGPASSWORD=$(cat /var/lib/arvados/api_database_pw) exec pg_dump --host=localhost --username=arvados --clean arvados_development > /var/lib/arvados/checkpoint.sql'
+       ;;
+
+    restore)
+       exec docker exec -ti $ARVBOX_CONTAINER bash -c 'PGPASSWORD=$(cat /var/lib/arvados/api_database_pw) exec psql --dbname=arvados_development --host=localhost --username=arvados --quiet --file=/var/lib/arvados/checkpoint.sql'
+       ;;
+
+    hotreset)
+       exec docker exec -i $ARVBOX_CONTAINER /usr/bin/env GEM_HOME=/var/lib/gems /bin/bash - <<EOF
+sv stop api
+sv stop controller
+sv stop websockets
+sv stop keepstore0
+sv stop keepstore1
+sv stop keepproxy
+cd /usr/src/arvados/services/api
+export RAILS_ENV=development
+bundle exec rake db:drop
+rm /var/lib/arvados/api_database_setup
+rm /var/lib/arvados/superuser_token
+rm /var/lib/arvados/keep0-uuid
+rm /var/lib/arvados/keep1-uuid
+rm /var/lib/arvados/keepproxy-uuid
+sv start api
+sv start controller
+sv start websockets
+sv restart keepstore0
+sv restart keepstore1
+sv restart keepproxy
+EOF
+       ;;
+
     *)
         echo "Arvados-in-a-box             https://doc.arvados.org/install/arvbox.html"
         echo
@@ -612,6 +647,8 @@ case "$subcmd" in
         echo "build   <config>   build arvbox Docker image"
         echo "reboot  <config>   stop, build arvbox Docker image, run"
         echo "rebuild <config>   build arvbox Docker image, no layer cache"
+       echo "checkpoint         create database backup"
+       echo "restore            restore checkpoint"
         echo "reset              delete arvbox arvados data (be careful!)"
         echo "destroy            delete all arvbox code and data (be careful!)"
         echo "log <service>      tail log of specified service"
index 951b592ea69d1893b6c030f539ce8e8d8bcf925e..89d1a48078e1ce707975f229239a8abbf21e190d 100755 (executable)
@@ -6,7 +6,7 @@
 exec 2>&1
 set -ex -o pipefail
 
-if [[ -s /etc/arvados/config.yml ]] ; then
+if [[ -s /etc/arvados/config.yml ]] && [[ /var/lib/arvados/cluster_config.yml.override -ot /etc/arvados/config.yml ]] ; then
    exit
 fi
 
@@ -82,7 +82,7 @@ Clusters:
       Keepproxy:
         InternalURLs:
           "http://localhost:${services[keepproxy]}/": {}
-        ExternalURL: "http://$localip:${services[keepproxy-ssl]}/"
+        ExternalURL: "https://$localip:${services[keepproxy-ssl]}/"
       Websocket:
         ExternalURL: "wss://$localip:${services[websockets-ssl]}/websocket"
         InternalURLs:
@@ -104,7 +104,7 @@ Clusters:
         InternalURLs:
           "http://localhost:${services[keep-web]}/": {}
       Composer:
-        ExternalURL: "http://$localip:${services[composer]}"
+        ExternalURL: "https://$localip:${services[composer]}"
       Controller:
         ExternalURL: "https://$localip:${services[controller-ssl]}"
         InternalURLs: