Merge branch 'master' into 16585-keep-exercise-improvements
authorWard Vandewege <ward@curii.com>
Wed, 22 Jul 2020 20:09:19 +0000 (16:09 -0400)
committerWard Vandewege <ward@curii.com>
Wed, 22 Jul 2020 20:09:34 +0000 (16:09 -0400)
refs #16585

Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward@curii.com>

36 files changed:
.licenseignore
cmd/arvados-client/cmd.go
go.mod
go.sum
lib/controller/api/routable.go [new file with mode: 0644]
lib/controller/handler.go
lib/controller/localdb/conn.go
lib/controller/localdb/login.go
lib/controller/localdb/login_ldap_test.go
lib/controller/router/router.go
lib/ctrlctx/db.go [moved from lib/controller/localdb/db.go with 62% similarity]
lib/ctrlctx/db_test.go [moved from lib/controller/localdb/db_test.go with 62% similarity]
lib/deduplicationreport/command.go [new file with mode: 0644]
lib/deduplicationreport/report.go [new file with mode: 0644]
lib/deduplicationreport/report_test.go [new file with mode: 0644]
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvtool.py
sdk/cwl/arvados_cwl/arvworkflow.py
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/arvados_cwl/fsaccess.py
sdk/cwl/arvados_cwl/http.py
sdk/cwl/arvados_cwl/pathmapper.py
sdk/cwl/arvados_cwl/runner.py
sdk/cwl/setup.py
sdk/cwl/tests/13976-keepref-wf.cwl
sdk/cwl/tests/16377-missing-default.cwl [new file with mode: 0644]
sdk/cwl/tests/arvados-tests.yml
sdk/cwl/tests/hello.yml [new file with mode: 0644]
sdk/cwl/tests/test_http.py
sdk/cwl/tests/test_submit.py
sdk/go/arvadostest/db.go [new file with mode: 0644]
sdk/python/arvados/commands/federation_migrate.py
sdk/python/tests/fed-migrate/check.py
sdk/python/tests/fed-migrate/create_users.py
tools/arvbox/bin/arvbox

index ad80dc3f4b671cc165db40fe6b215359933a0315..81f6b7181d2083ff2b84b3b5ec0e88168d58ca4b 100644 (file)
@@ -79,4 +79,6 @@ lib/dispatchcloud/test/sshkey_*
 *.asc
 sdk/java-v2/build.gradle
 sdk/java-v2/settings.gradle
-sdk/cwl/tests/wf/feddemo
\ No newline at end of file
+sdk/cwl/tests/wf/feddemo
+go.mod
+go.sum
index 887bc62bb322a7e5df7f41ab74efd9c74d82b655..bcc3dda09ac91559d4a35227ef81c95bf3e979cd 100644 (file)
@@ -9,6 +9,7 @@ import (
 
        "git.arvados.org/arvados.git/lib/cli"
        "git.arvados.org/arvados.git/lib/cmd"
+       "git.arvados.org/arvados.git/lib/deduplicationreport"
        "git.arvados.org/arvados.git/lib/mount"
 )
 
@@ -52,7 +53,8 @@ var (
                "virtual_machine":          cli.APICall,
                "workflow":                 cli.APICall,
 
-               "mount": mount.Command,
+               "mount":                mount.Command,
+               "deduplication-report": deduplicationreport.Command,
        })
 )
 
diff --git a/go.mod b/go.mod
index cc5457975f54da4d6e00702a955451f104fe39d1..884d1fcdac8637e77fb349aa569ad8fcbb5b7924 100644 (file)
--- a/go.mod
+++ b/go.mod
@@ -22,6 +22,7 @@ require (
        github.com/docker/docker v1.4.2-0.20180109013817-94b8a116fbf1
        github.com/docker/go-connections v0.3.0 // indirect
        github.com/docker/go-units v0.3.3-0.20171221200356-d59758554a3d // indirect
+       github.com/dustin/go-humanize v1.0.0
        github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 // indirect
        github.com/fsnotify/fsnotify v1.4.9
        github.com/ghodss/yaml v1.0.0
@@ -35,6 +36,7 @@ require (
        github.com/imdario/mergo v0.3.8-0.20190415133143-5ef87b449ca7
        github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
        github.com/jmcvetta/randutil v0.0.0-20150817122601-2bb1b664bcff
+       github.com/jmoiron/sqlx v1.2.0
        github.com/julienschmidt/httprouter v1.2.0
        github.com/karalabe/xgo v0.0.0-20191115072854-c5ccff8648a7 // indirect
        github.com/kevinburke/ssh_config v0.0.0-20171013211458-802051befeb5 // indirect
diff --git a/go.sum b/go.sum
index 38153ce3eaa08844dd2abfb944b9318145fbeed0..ead655c9b276164c2a7e9766ea57d7b96ef3f82a 100644 (file)
--- a/go.sum
+++ b/go.sum
@@ -56,6 +56,8 @@ github.com/docker/go-connections v0.3.0 h1:3lOnM9cSzgGwx8VfK/NGOW5fLQ0GjIlCkaktF
 github.com/docker/go-connections v0.3.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
 github.com/docker/go-units v0.3.3-0.20171221200356-d59758554a3d h1:dVaNRYvaGV23AdNdsm+4y1mPN0tj3/1v6taqKMmM6Ko=
 github.com/docker/go-units v0.3.3-0.20171221200356-d59758554a3d/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
+github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
+github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
 github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 h1:BHsljHzVlRcyQhjrss6TZTdY2VfCqZPbv5k3iBFa2ZQ=
 github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
 github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
@@ -72,6 +74,7 @@ github.com/go-ldap/ldap v3.0.3+incompatible h1:HTeSZO8hWMS1Rgb2Ziku6b8a7qRIZZMHj
 github.com/go-ldap/ldap v3.0.3+incompatible/go.mod h1:qfd9rJvER9Q0/D/Sqn1DfHRoBp40uXYvFoEVrNEPqRc=
 github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
 github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
+github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
 github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
 github.com/gogo/protobuf v1.1.1 h1:72R+M5VuhED/KujmZVcIquuo8mBgX4oVda//DQb3PXo=
 github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
@@ -109,6 +112,8 @@ github.com/jmcvetta/randutil v0.0.0-20150817122601-2bb1b664bcff h1:6NvhExg4omUC9
 github.com/jmcvetta/randutil v0.0.0-20150817122601-2bb1b664bcff/go.mod h1:ddfPX8Z28YMjiqoaJhNBzWHapTHXejnB5cDCUWDwriw=
 github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM=
 github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
+github.com/jmoiron/sqlx v1.2.0 h1:41Ip0zITnmWNR/vHV+S4m+VoUivnWY5E4OJfLZjCJMA=
+github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks=
 github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
 github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
 github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
@@ -121,10 +126,12 @@ github.com/kevinburke/ssh_config v0.0.0-20171013211458-802051befeb5/go.mod h1:CT
 github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
 github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
+github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
 github.com/lib/pq v1.3.0 h1:/qkRGz8zljWiDcFvgpwUpwIAPu3r07TDvs3Rws+o/pU=
 github.com/lib/pq v1.3.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
 github.com/marstr/guid v1.1.1-0.20170427235115-8bdf7d1a087c h1:ouxemItv3B/Zh008HJkEXDYCN3BIRyNHxtUN7ThJ5Js=
 github.com/marstr/guid v1.1.1-0.20170427235115-8bdf7d1a087c/go.mod h1:74gB1z2wpxxInTG6yaqA7KrtM0NZ+RbrcqDvYHefzho=
+github.com/mattn/go-sqlite3 v1.9.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
 github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
 github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
 github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747 h1:eQox4Rh4ewJF+mqYPxCkmBAirRnPaHEB26UkNuPyjlk=
diff --git a/lib/controller/api/routable.go b/lib/controller/api/routable.go
new file mode 100644 (file)
index 0000000..6049cba
--- /dev/null
@@ -0,0 +1,17 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+// Package api provides types used by controller/server-component
+// packages.
+package api
+
+import "context"
+
+// A RoutableFunc calls an API method (sometimes via a wrapped
+// RoutableFunc) that has real argument types.
+//
+// (It is used by ctrlctx to manage database transactions, so moving
+// it to the router package would cause a circular dependency
+// router->arvadostest->ctrlctx->router.)
+type RoutableFunc func(ctx context.Context, opts interface{}) (interface{}, error)
index cc06246420559479203e24843164cee281e07633..e742bbc59b08a3a01a8302fcadb2cda6042cded9 100644 (file)
@@ -6,7 +6,6 @@ package controller
 
 import (
        "context"
-       "database/sql"
        "errors"
        "fmt"
        "net/http"
@@ -16,13 +15,14 @@ import (
        "time"
 
        "git.arvados.org/arvados.git/lib/controller/federation"
-       "git.arvados.org/arvados.git/lib/controller/localdb"
        "git.arvados.org/arvados.git/lib/controller/railsproxy"
        "git.arvados.org/arvados.git/lib/controller/router"
+       "git.arvados.org/arvados.git/lib/ctrlctx"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
        "git.arvados.org/arvados.git/sdk/go/health"
        "git.arvados.org/arvados.git/sdk/go/httpserver"
+       "github.com/jmoiron/sqlx"
        _ "github.com/lib/pq"
 )
 
@@ -34,7 +34,7 @@ type Handler struct {
        proxy          *proxy
        secureClient   *http.Client
        insecureClient *http.Client
-       pgdb           *sql.DB
+       pgdb           *sqlx.DB
        pgdbMtx        sync.Mutex
 }
 
@@ -87,7 +87,7 @@ func (h *Handler) setup() {
                Routes: health.Routes{"ping": func() error { _, err := h.db(context.TODO()); return err }},
        })
 
-       rtr := router.New(federation.New(h.Cluster), localdb.WrapCallsInTransactions(h.db))
+       rtr := router.New(federation.New(h.Cluster), ctrlctx.WrapCallsInTransactions(h.db))
        mux.Handle("/arvados/v1/config", rtr)
        mux.Handle("/"+arvados.EndpointUserAuthenticate.Path, rtr)
 
@@ -121,14 +121,14 @@ func (h *Handler) setup() {
 
 var errDBConnection = errors.New("database connection error")
 
-func (h *Handler) db(ctx context.Context) (*sql.DB, error) {
+func (h *Handler) db(ctx context.Context) (*sqlx.DB, error) {
        h.pgdbMtx.Lock()
        defer h.pgdbMtx.Unlock()
        if h.pgdb != nil {
                return h.pgdb, nil
        }
 
-       db, err := sql.Open("postgres", h.Cluster.PostgreSQL.Connection.String())
+       db, err := sqlx.Open("postgres", h.Cluster.PostgreSQL.Connection.String())
        if err != nil {
                ctxlog.FromContext(ctx).WithError(err).Error("postgresql connect failed")
                return nil, errDBConnection
index 60263455bdb1d02c10a9164c7c235d22a0f90fb7..4f0035edf993ad525c4d82b8d5e880049432c6c2 100644 (file)
@@ -22,11 +22,13 @@ type Conn struct {
 
 func NewConn(cluster *arvados.Cluster) *Conn {
        railsProxy := railsproxy.NewConn(cluster)
-       return &Conn{
+       var conn Conn
+       conn = Conn{
                cluster:         cluster,
                railsProxy:      railsProxy,
                loginController: chooseLoginController(cluster, railsProxy),
        }
+       return &conn
 }
 
 func (conn *Conn) Logout(ctx context.Context, opts arvados.LogoutOptions) (arvados.LogoutResponse, error) {
index 1cd349a10eaa94d987899ac1315f811ffbf186e1..ee1ea56924c5700d25e43262347d1045d534ca5c 100644 (file)
@@ -15,6 +15,7 @@ import (
        "strings"
 
        "git.arvados.org/arvados.git/lib/controller/rpc"
+       "git.arvados.org/arvados.git/lib/ctrlctx"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/auth"
        "git.arvados.org/arvados.git/sdk/go/httpserver"
@@ -117,7 +118,7 @@ func createAPIClientAuthorization(ctx context.Context, conn *rpc.Conn, rootToken
                return
        }
        token := target.Query().Get("api_token")
-       tx, err := currenttx(ctx)
+       tx, err := ctrlctx.CurrentTx(ctx)
        if err != nil {
                return
        }
@@ -130,7 +131,7 @@ func createAPIClientAuthorization(ctx context.Context, conn *rpc.Conn, rootToken
        }
        var exp sql.NullString
        var scopes []byte
-       err = tx.QueryRowContext(ctx, "select uuid, api_token, expires_at, scopes from api_client_authorizations where api_token=$1", tokensecret).Scan(&resp.UUID, &resp.APIToken, &exp, &scopes)
+       err = tx.QueryRowxContext(ctx, "select uuid, api_token, expires_at, scopes from api_client_authorizations where api_token=$1", tokensecret).Scan(&resp.UUID, &resp.APIToken, &exp, &scopes)
        if err != nil {
                return
        }
index 64ae58bce2681f792020b1855c2465d5ad226ae1..0c94fa6c0e21be72949f6fd5b402ae252d7ce1cc 100644 (file)
@@ -6,18 +6,19 @@ package localdb
 
 import (
        "context"
-       "database/sql"
        "encoding/json"
        "net"
        "net/http"
 
        "git.arvados.org/arvados.git/lib/config"
        "git.arvados.org/arvados.git/lib/controller/railsproxy"
+       "git.arvados.org/arvados.git/lib/ctrlctx"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/arvadostest"
        "git.arvados.org/arvados.git/sdk/go/auth"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
        "github.com/bradleypeabody/godap"
+       "github.com/jmoiron/sqlx"
        check "gopkg.in/check.v1"
 )
 
@@ -27,11 +28,11 @@ type LDAPSuite struct {
        cluster *arvados.Cluster
        ctrl    *ldapLoginController
        ldap    *godap.LDAPServer // fake ldap server that accepts auth goodusername/goodpassword
-       db      *sql.DB
+       db      *sqlx.DB
 
        // transaction context
        ctx      context.Context
-       rollback func()
+       rollback func() error
 }
 
 func (s *LDAPSuite) TearDownSuite(c *check.C) {
@@ -91,15 +92,20 @@ func (s *LDAPSuite) SetUpSuite(c *check.C) {
                Cluster:    s.cluster,
                RailsProxy: railsproxy.NewConn(s.cluster),
        }
-       s.db = testdb(c, s.cluster)
+       s.db = arvadostest.DB(c, s.cluster)
 }
 
 func (s *LDAPSuite) SetUpTest(c *check.C) {
-       s.ctx, s.rollback = testctx(c, s.db)
+       tx, err := s.db.Beginx()
+       c.Assert(err, check.IsNil)
+       s.ctx = ctrlctx.NewWithTransaction(context.Background(), tx)
+       s.rollback = tx.Rollback
 }
 
 func (s *LDAPSuite) TearDownTest(c *check.C) {
-       s.rollback()
+       if s.rollback != nil {
+               s.rollback()
+       }
 }
 
 func (s *LDAPSuite) TestLoginSuccess(c *check.C) {
index 29c81ac5cae9ac63431e691852230a00c2335afe..2944524344e9028fa22cf0c9d18327cb39193733 100644 (file)
@@ -10,6 +10,7 @@ import (
        "net/http"
        "strings"
 
+       "git.arvados.org/arvados.git/lib/controller/api"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/auth"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
@@ -21,7 +22,7 @@ import (
 type router struct {
        mux       *mux.Router
        backend   arvados.API
-       wrapCalls func(RoutableFunc) RoutableFunc
+       wrapCalls func(api.RoutableFunc) api.RoutableFunc
 }
 
 // New returns a new router (which implements the http.Handler
@@ -32,7 +33,7 @@ type router struct {
 // the returned method is used in its place. This can be used to
 // install hooks before and after each API call and alter responses;
 // see localdb.WrapCallsInTransaction for an example.
-func New(backend arvados.API, wrapCalls func(RoutableFunc) RoutableFunc) *router {
+func New(backend arvados.API, wrapCalls func(api.RoutableFunc) api.RoutableFunc) *router {
        rtr := &router{
                mux:       mux.NewRouter(),
                backend:   backend,
@@ -42,13 +43,11 @@ func New(backend arvados.API, wrapCalls func(RoutableFunc) RoutableFunc) *router
        return rtr
 }
 
-type RoutableFunc func(ctx context.Context, opts interface{}) (interface{}, error)
-
 func (rtr *router) addRoutes() {
        for _, route := range []struct {
                endpoint    arvados.APIEndpoint
                defaultOpts func() interface{}
-               exec        RoutableFunc
+               exec        api.RoutableFunc
        }{
                {
                        arvados.EndpointConfigGet,
@@ -340,7 +339,7 @@ var altMethod = map[string]string{
        "GET":   "HEAD", // Accept HEAD at any GET route
 }
 
-func (rtr *router) addRoute(endpoint arvados.APIEndpoint, defaultOpts func() interface{}, exec RoutableFunc) {
+func (rtr *router) addRoute(endpoint arvados.APIEndpoint, defaultOpts func() interface{}, exec api.RoutableFunc) {
        methods := []string{endpoint.Method}
        if alt, ok := altMethod[endpoint.Method]; ok {
                methods = append(methods, alt)
similarity index 62%
rename from lib/controller/localdb/db.go
rename to lib/ctrlctx/db.go
index 4f64e63524469cc9e9fb987a4570772eb445fd8b..127be489df3a27e553f6aa421a6f1c40cdbdcc55 100644 (file)
@@ -2,16 +2,22 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package localdb
+package ctrlctx
 
 import (
        "context"
-       "database/sql"
        "errors"
        "sync"
 
-       "git.arvados.org/arvados.git/lib/controller/router"
+       "git.arvados.org/arvados.git/lib/controller/api"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "github.com/jmoiron/sqlx"
+       _ "github.com/lib/pq"
+)
+
+var (
+       ErrNoTransaction   = errors.New("bug: there is no transaction in this context")
+       ErrContextFinished = errors.New("refusing to start a transaction after wrapped function already returned")
 )
 
 // WrapCallsInTransactions returns a call wrapper (suitable for
@@ -20,20 +26,20 @@ import (
 //
 // The wrapper calls getdb() to get a database handle before each API
 // call.
-func WrapCallsInTransactions(getdb func(context.Context) (*sql.DB, error)) func(router.RoutableFunc) router.RoutableFunc {
-       return func(origFunc router.RoutableFunc) router.RoutableFunc {
+func WrapCallsInTransactions(getdb func(context.Context) (*sqlx.DB, error)) func(api.RoutableFunc) api.RoutableFunc {
+       return func(origFunc api.RoutableFunc) api.RoutableFunc {
                return func(ctx context.Context, opts interface{}) (_ interface{}, err error) {
-                       ctx, finishtx := starttx(ctx, getdb)
+                       ctx, finishtx := New(ctx, getdb)
                        defer finishtx(&err)
                        return origFunc(ctx, opts)
                }
        }
 }
 
-// ContextWithTransaction returns a child context in which the given
+// NewWithTransaction returns a child context in which the given
 // transaction will be used by any localdb API call that needs one.
 // The caller is responsible for calling Commit or Rollback on tx.
-func ContextWithTransaction(ctx context.Context, tx *sql.Tx) context.Context {
+func NewWithTransaction(ctx context.Context, tx *sqlx.Tx) context.Context {
        txn := &transaction{tx: tx}
        txn.setup.Do(func() {})
        return context.WithValue(ctx, contextKeyTransaction, txn)
@@ -44,26 +50,26 @@ type contextKeyT string
 var contextKeyTransaction = contextKeyT("transaction")
 
 type transaction struct {
-       tx    *sql.Tx
+       tx    *sqlx.Tx
        err   error
-       getdb func(context.Context) (*sql.DB, error)
+       getdb func(context.Context) (*sqlx.DB, error)
        setup sync.Once
 }
 
-type transactionFinishFunc func(*error)
+type finishFunc func(*error)
 
-// starttx returns a new child context that can be used with
-// currenttx(). It does not open a database transaction until the
-// first call to currenttx().
+// New returns a new child context that can be used with
+// CurrentTx(). It does not open a database transaction until the
+// first call to CurrentTx().
 //
 // The caller must eventually call the returned finishtx() func to
 // commit or rollback the transaction, if any.
 //
 //     func example(ctx context.Context) (err error) {
-//             ctx, finishtx := starttx(ctx, dber)
+//             ctx, finishtx := New(ctx, dber)
 //             defer finishtx(&err)
 //             // ...
-//             tx, err := currenttx(ctx)
+//             tx, err := CurrentTx(ctx)
 //             if err != nil {
 //                     return fmt.Errorf("example: %s", err)
 //             }
@@ -75,17 +81,17 @@ type transactionFinishFunc func(*error)
 //
 // If *err is non-nil, finishtx() rolls back the transaction, and
 // does not modify *err.
-func starttx(ctx context.Context, getdb func(context.Context) (*sql.DB, error)) (context.Context, transactionFinishFunc) {
+func New(ctx context.Context, getdb func(context.Context) (*sqlx.DB, error)) (context.Context, finishFunc) {
        txn := &transaction{getdb: getdb}
        return context.WithValue(ctx, contextKeyTransaction, txn), func(err *error) {
                txn.setup.Do(func() {
                        // Using (*sync.Once)Do() prevents a future
-                       // call to currenttx() from opening a
+                       // call to CurrentTx() from opening a
                        // transaction which would never get committed
-                       // or rolled back. If currenttx() hasn't been
+                       // or rolled back. If CurrentTx() hasn't been
                        // called before now, future calls will return
                        // this error.
-                       txn.err = errors.New("refusing to start a transaction after wrapped function already returned")
+                       txn.err = ErrContextFinished
                })
                if txn.tx == nil {
                        // we never [successfully] started a transaction
@@ -100,16 +106,16 @@ func starttx(ctx context.Context, getdb func(context.Context) (*sql.DB, error))
        }
 }
 
-func currenttx(ctx context.Context) (*sql.Tx, error) {
+func CurrentTx(ctx context.Context) (*sqlx.Tx, error) {
        txn, ok := ctx.Value(contextKeyTransaction).(*transaction)
        if !ok {
-               return nil, errors.New("bug: there is no transaction in this context")
+               return nil, ErrNoTransaction
        }
        txn.setup.Do(func() {
                if db, err := txn.getdb(ctx); err != nil {
                        txn.err = err
                } else {
-                       txn.tx, txn.err = db.Begin()
+                       txn.tx, txn.err = db.Beginx()
                }
        })
        return txn.tx, txn.err
similarity index 62%
rename from lib/controller/localdb/db_test.go
rename to lib/ctrlctx/db_test.go
index 5bab86c60289e688475efa98e6be9061936a800a..5361f13c68a4967168082b28f16ab562fce546ee 100644 (file)
@@ -2,37 +2,24 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package localdb
+package ctrlctx
 
 import (
        "context"
-       "database/sql"
        "sync"
        "sync/atomic"
+       "testing"
 
        "git.arvados.org/arvados.git/lib/config"
-       "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "github.com/jmoiron/sqlx"
        _ "github.com/lib/pq"
        check "gopkg.in/check.v1"
 )
 
-// testdb returns a DB connection for the given cluster config.
-func testdb(c *check.C, cluster *arvados.Cluster) *sql.DB {
-       db, err := sql.Open("postgres", cluster.PostgreSQL.Connection.String())
-       c.Assert(err, check.IsNil)
-       return db
-}
-
-// testctx returns a context suitable for running a test case in a new
-// transaction, and a rollback func which the caller should call after
-// the test.
-func testctx(c *check.C, db *sql.DB) (ctx context.Context, rollback func()) {
-       tx, err := db.Begin()
-       c.Assert(err, check.IsNil)
-       return ContextWithTransaction(context.Background(), tx), func() {
-               c.Check(tx.Rollback(), check.IsNil)
-       }
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       check.TestingT(t)
 }
 
 var _ = check.Suite(&DatabaseSuite{})
@@ -46,26 +33,28 @@ func (*DatabaseSuite) TestTransactionContext(c *check.C) {
        c.Assert(err, check.IsNil)
 
        var getterCalled int64
-       getter := func(context.Context) (*sql.DB, error) {
+       getter := func(context.Context) (*sqlx.DB, error) {
                atomic.AddInt64(&getterCalled, 1)
-               return testdb(c, cluster), nil
+               db, err := sqlx.Open("postgres", cluster.PostgreSQL.Connection.String())
+               c.Assert(err, check.IsNil)
+               return db, nil
        }
        wrapper := WrapCallsInTransactions(getter)
        wrappedFunc := wrapper(func(ctx context.Context, opts interface{}) (interface{}, error) {
-               txes := make([]*sql.Tx, 20)
+               txes := make([]*sqlx.Tx, 20)
                var wg sync.WaitGroup
                for i := range txes {
                        i := i
                        wg.Add(1)
                        go func() {
-                               // Concurrent calls to currenttx(),
+                               // Concurrent calls to CurrentTx(),
                                // with different children of the same
                                // parent context, will all return the
                                // same transaction.
                                defer wg.Done()
                                ctx, cancel := context.WithCancel(ctx)
                                defer cancel()
-                               tx, err := currenttx(ctx)
+                               tx, err := CurrentTx(ctx)
                                c.Check(err, check.IsNil)
                                txes[i] = tx
                        }()
@@ -82,8 +71,8 @@ func (*DatabaseSuite) TestTransactionContext(c *check.C) {
        c.Check(err, check.IsNil)
        c.Check(getterCalled, check.Equals, int64(1))
 
-       // When a wrapped func returns without calling currenttx(),
-       // calling currenttx() later shouldn't start a new
+       // When a wrapped func returns without calling CurrentTx(),
+       // calling CurrentTx() later shouldn't start a new
        // transaction.
        var savedctx context.Context
        ok, err = wrapper(func(ctx context.Context, opts interface{}) (interface{}, error) {
@@ -92,7 +81,7 @@ func (*DatabaseSuite) TestTransactionContext(c *check.C) {
        })(context.Background(), "blah")
        c.Check(ok, check.Equals, true)
        c.Check(err, check.IsNil)
-       tx, err := currenttx(savedctx)
+       tx, err := CurrentTx(savedctx)
        c.Check(tx, check.IsNil)
        c.Check(err, check.NotNil)
 }
diff --git a/lib/deduplicationreport/command.go b/lib/deduplicationreport/command.go
new file mode 100644 (file)
index 0000000..1199bc0
--- /dev/null
@@ -0,0 +1,43 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package deduplicationreport
+
+import (
+       "io"
+
+       "git.arvados.org/arvados.git/lib/config"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "github.com/sirupsen/logrus"
+)
+
+var Command command
+
+type command struct{}
+
+type NoPrefixFormatter struct{}
+
+func (f *NoPrefixFormatter) Format(entry *logrus.Entry) ([]byte, error) {
+       return []byte(entry.Message), nil
+}
+
+// RunCommand implements the subcommand "deduplication-report <collection> <collection> ..."
+func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+       var err error
+       logger := ctxlog.New(stderr, "text", "info")
+       defer func() {
+               if err != nil {
+                       logger.WithError(err).Error("fatal")
+               }
+       }()
+
+       logger.SetFormatter(new(NoPrefixFormatter))
+
+       loader := config.NewLoader(stdin, logger)
+       loader.SkipLegacy = true
+
+       exitcode := report(prog, args, loader, logger, stdout, stderr)
+
+       return exitcode
+}
diff --git a/lib/deduplicationreport/report.go b/lib/deduplicationreport/report.go
new file mode 100644 (file)
index 0000000..8bb3fc4
--- /dev/null
@@ -0,0 +1,216 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package deduplicationreport
+
+import (
+       "flag"
+       "fmt"
+       "io"
+       "strings"
+
+       "git.arvados.org/arvados.git/lib/config"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/arvadosclient"
+       "git.arvados.org/arvados.git/sdk/go/manifest"
+
+       "github.com/dustin/go-humanize"
+       "github.com/sirupsen/logrus"
+)
+
+func deDuplicate(inputs []string) (trimmed []string) {
+       seen := make(map[string]bool)
+       for _, uuid := range inputs {
+               if !seen[uuid] {
+                       seen[uuid] = true
+                       trimmed = append(trimmed, uuid)
+               }
+       }
+       return
+}
+
+func parseFlags(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stderr io.Writer) (exitcode int, inputs []string) {
+       flags := flag.NewFlagSet("", flag.ContinueOnError)
+       flags.SetOutput(stderr)
+       flags.Usage = func() {
+               fmt.Fprintf(flags.Output(), `
+Usage:
+  %s [options ...] <collection-uuid> <collection-uuid> ...
+
+  %s [options ...] <collection-pdh>,<collection_uuid> \
+     <collection-pdh>,<collection_uuid> ...
+
+  This program analyzes the overlap in blocks used by 2 or more collections. It
+  prints a deduplication report that shows the nominal space used by the
+  collections, as well as the actual size and the amount of space that is saved
+  by Keep's deduplication.
+
+  The list of collections may be provided in two ways. A list of collection
+  uuids is sufficient. Alternatively, the PDH for each collection may also be
+  provided. This is will greatly speed up operation when the list contains
+  multiple collections with the same PDH.
+
+  Exit status will be zero if there were no errors generating the report.
+
+Example:
+
+  Use the 'arv' and 'jq' commands to get the list of the 100
+  largest collections and generate the deduplication report:
+
+  arv collection list --order 'file_size_total desc' --limit 100 | \
+    jq -r '.items[] | [.portable_data_hash,.uuid] |@csv' | \
+    tail -n+2 |sed -e 's/"//g'|tr '\n' ' ' | \
+    xargs %s
+
+Options:
+`, prog, prog, prog)
+               flags.PrintDefaults()
+       }
+       loader.SetupFlags(flags)
+       loglevel := flags.String("log-level", "info", "logging level (debug, info, ...)")
+       err := flags.Parse(args)
+       if err == flag.ErrHelp {
+               return 0, inputs
+       } else if err != nil {
+               return 2, inputs
+       }
+
+       inputs = flags.Args()
+
+       inputs = deDuplicate(inputs)
+
+       if len(inputs) < 1 {
+               logger.Errorf("Error: no collections provided")
+               flags.Usage()
+               return 2, inputs
+       }
+
+       lvl, err := logrus.ParseLevel(*loglevel)
+       if err != nil {
+               return 2, inputs
+       }
+       logger.SetLevel(lvl)
+       return
+}
+
+func blockList(collection arvados.Collection) (blocks map[string]int) {
+       blocks = make(map[string]int)
+       m := manifest.Manifest{Text: collection.ManifestText}
+       blockChannel := m.BlockIterWithDuplicates()
+       for b := range blockChannel {
+               blocks[b.Digest.String()] = b.Size
+       }
+       return
+}
+
+func report(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int) {
+
+       var inputs []string
+       exitcode, inputs = parseFlags(prog, args, loader, logger, stderr)
+       if exitcode != 0 {
+               return
+       }
+
+       // Arvados Client setup
+       arv, err := arvadosclient.MakeArvadosClient()
+       if err != nil {
+               logger.Errorf("Error creating Arvados object: %s", err)
+               exitcode = 1
+               return
+       }
+
+       type Col struct {
+               FileSizeTotal int64
+               FileCount     int64
+       }
+
+       blocks := make(map[string]map[string]int)
+       pdhs := make(map[string]Col)
+       var nominalSize int64
+
+       for _, input := range inputs {
+               var uuid string
+               var pdh string
+               if strings.Contains(input, ",") {
+                       // The input is in the format pdh,uuid. This will allow us to save time on duplicate pdh's
+                       tmp := strings.Split(input, ",")
+                       pdh = tmp[0]
+                       uuid = tmp[1]
+               } else {
+                       // The input must be a plain uuid
+                       uuid = input
+               }
+               if !strings.Contains(uuid, "-4zz18-") {
+                       logger.Errorf("Error: uuid must refer to collection object")
+                       exitcode = 1
+                       return
+               }
+               if _, ok := pdhs[pdh]; ok {
+                       // We've processed a collection with this pdh already. Simply add its
+                       // size to the totals and move on to the next one.
+                       // Note that we simply trust the PDH matches the collection UUID here,
+                       // in other words, we use it over the UUID. If they don't match, the report
+                       // will be wrong.
+                       nominalSize += pdhs[pdh].FileSizeTotal
+               } else {
+                       var collection arvados.Collection
+                       err = arv.Get("collections", uuid, nil, &collection)
+                       if err != nil {
+                               logger.Errorf("Error: unable to retrieve collection: %s", err)
+                               exitcode = 1
+                               return
+                       }
+                       blocks[uuid] = make(map[string]int)
+                       blocks[uuid] = blockList(collection)
+                       if pdh != "" && collection.PortableDataHash != pdh {
+                               logger.Errorf("Error: the collection with UUID %s has PDH %s, but a different PDH was provided in the arguments: %s", uuid, collection.PortableDataHash, pdh)
+                               exitcode = 1
+                               return
+                       }
+                       if pdh == "" {
+                               pdh = collection.PortableDataHash
+                       }
+
+                       col := Col{}
+                       if collection.FileSizeTotal != 0 || collection.FileCount != 0 {
+                               nominalSize += collection.FileSizeTotal
+                               col.FileSizeTotal = collection.FileSizeTotal
+                               col.FileCount = int64(collection.FileCount)
+                       } else {
+                               // Collections created with old Arvados versions do not always have the total file size and count cached in the collections object
+                               var collSize int64
+                               for _, size := range blocks[uuid] {
+                                       collSize += int64(size)
+                               }
+                               nominalSize += collSize
+                               col.FileSizeTotal = collSize
+                       }
+                       pdhs[pdh] = col
+               }
+
+               if pdhs[pdh].FileCount != 0 {
+                       fmt.Fprintf(stdout, "Collection %s: pdh %s; nominal size %d (%s); file count %d\n", uuid, pdh, pdhs[pdh].FileSizeTotal, humanize.IBytes(uint64(pdhs[pdh].FileSizeTotal)), pdhs[pdh].FileCount)
+               } else {
+                       fmt.Fprintf(stdout, "Collection %s: pdh %s; nominal size %d (%s)\n", uuid, pdh, pdhs[pdh].FileSizeTotal, humanize.IBytes(uint64(pdhs[pdh].FileSizeTotal)))
+               }
+       }
+
+       var totalSize int64
+       seen := make(map[string]bool)
+       for _, v := range blocks {
+               for pdh, size := range v {
+                       if !seen[pdh] {
+                               seen[pdh] = true
+                               totalSize += int64(size)
+                       }
+               }
+       }
+       fmt.Fprintln(stdout)
+       fmt.Fprintf(stdout, "Collections:                 %15d\n", len(inputs))
+       fmt.Fprintf(stdout, "Nominal size of stored data: %15d bytes (%s)\n", nominalSize, humanize.IBytes(uint64(nominalSize)))
+       fmt.Fprintf(stdout, "Actual size of stored data:  %15d bytes (%s)\n", totalSize, humanize.IBytes(uint64(totalSize)))
+       fmt.Fprintf(stdout, "Saved by Keep deduplication: %15d bytes (%s)\n", nominalSize-totalSize, humanize.IBytes(uint64(nominalSize-totalSize)))
+
+       return exitcode
+}
diff --git a/lib/deduplicationreport/report_test.go b/lib/deduplicationreport/report_test.go
new file mode 100644 (file)
index 0000000..a4ed466
--- /dev/null
@@ -0,0 +1,119 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package deduplicationreport
+
+import (
+       "bytes"
+       "testing"
+
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/arvadostest"
+       "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
+
+var _ = check.Suite(&Suite{})
+
+type Suite struct{}
+
+func (s *Suite) TearDownSuite(c *check.C) {
+       // Undo any changes/additions to the database so they don't affect subsequent tests.
+       arvadostest.ResetEnv()
+}
+
+func (*Suite) TestUsage(c *check.C) {
+       var stdout, stderr bytes.Buffer
+       exitcode := Command.RunCommand("deduplicationreport.test", []string{"-log-level=debug"}, &bytes.Buffer{}, &stdout, &stderr)
+       c.Check(exitcode, check.Equals, 2)
+       c.Check(stdout.String(), check.Equals, "")
+       c.Log(stderr.String())
+       c.Check(stderr.String(), check.Matches, `(?ms).*Usage:.*`)
+}
+
+func (*Suite) TestTwoIdenticalUUIDs(c *check.C) {
+       var stdout, stderr bytes.Buffer
+       // Run dedupreport with 2 identical uuids
+       exitcode := Command.RunCommand("deduplicationreport.test", []string{arvadostest.FooCollection, arvadostest.FooCollection}, &bytes.Buffer{}, &stdout, &stderr)
+       c.Check(exitcode, check.Equals, 0)
+       c.Check(stdout.String(), check.Matches, "(?ms).*Collections:[[:space:]]+1.*")
+       c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+3 bytes \\(3 B\\).*")
+       c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+3 bytes \\(3 B\\).*")
+       c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+0 bytes \\(0 B\\).*")
+       c.Log(stderr.String())
+}
+
+func (*Suite) TestTwoUUIDsInvalidPDH(c *check.C) {
+       var stdout, stderr bytes.Buffer
+       // Run dedupreport with pdh,uuid where pdh does not match
+       exitcode := Command.RunCommand("deduplicationreport.test", []string{arvadostest.FooAndBarFilesInDirPDH + "," + arvadostest.FooCollection, arvadostest.FooCollection}, &bytes.Buffer{}, &stdout, &stderr)
+       c.Check(exitcode, check.Equals, 1)
+       c.Check(stdout.String(), check.Equals, "")
+       c.Log(stderr.String())
+       c.Check(stderr.String(), check.Matches, `(?ms).*Error: the collection with UUID zzzzz-4zz18-fy296fx3hot09f7 has PDH 1f4b0bc7583c2a7f9102c395f4ffc5e3\+45, but a different PDH was provided in the arguments: 870369fc72738603c2fad16664e50e2d\+58.*`)
+}
+
+func (*Suite) TestNonExistentCollection(c *check.C) {
+       var stdout, stderr bytes.Buffer
+       // Run dedupreport with many UUIDs
+       exitcode := Command.RunCommand("deduplicationreport.test", []string{arvadostest.FooCollection, arvadostest.NonexistentCollection}, &bytes.Buffer{}, &stdout, &stderr)
+       c.Check(exitcode, check.Equals, 1)
+       c.Check(stdout.String(), check.Equals, "Collection zzzzz-4zz18-fy296fx3hot09f7: pdh 1f4b0bc7583c2a7f9102c395f4ffc5e3+45; nominal size 3 (3 B)\n")
+       c.Log(stderr.String())
+       c.Check(stderr.String(), check.Matches, `(?ms).*Error: unable to retrieve collection:.*404 Not Found.*`)
+}
+
+func (*Suite) TestManyUUIDsNoOverlap(c *check.C) {
+       var stdout, stderr bytes.Buffer
+       // Run dedupreport with 5 UUIDs
+       exitcode := Command.RunCommand("deduplicationreport.test", []string{arvadostest.FooCollection, arvadostest.HelloWorldCollection, arvadostest.FooBarDirCollection, arvadostest.WazVersion1Collection, arvadostest.UserAgreementCollection}, &bytes.Buffer{}, &stdout, &stderr)
+       c.Check(exitcode, check.Equals, 0)
+       c.Check(stdout.String(), check.Matches, "(?ms).*Collections:[[:space:]]+5.*")
+       c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+249049 bytes \\(243 KiB\\).*")
+       c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+249049 bytes \\(243 KiB\\).*")
+       c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+0 bytes \\(0 B\\).*")
+       c.Log(stderr.String())
+       c.Check(stderr.String(), check.Equals, "")
+}
+
+func (*Suite) TestTwoOverlappingCollections(c *check.C) {
+       var stdout, stderr bytes.Buffer
+       // Create two collections
+       arv := arvados.NewClientFromEnv()
+
+       var c1 arvados.Collection
+       err := arv.RequestAndDecode(&c1, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:foo\n"}})
+       c.Assert(err, check.Equals, nil)
+
+       var c2 arvados.Collection
+       err = arv.RequestAndDecode(&c2, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". c157a79031e1c40f85931829bc5fc552+4 d3b07384d113edec49eaa6238ad5ff00+4 0:4:bar 4:4:foo\n"}})
+       c.Assert(err, check.Equals, nil)
+
+       for _, trial := range []struct {
+               field1 string
+               field2 string
+       }{
+               {
+                       // Run dedupreport with 2 arguments: uuid uuid
+                       field1: c1.UUID,
+                       field2: c2.UUID,
+               },
+               {
+                       // Run dedupreport with 2 arguments: pdh,uuid uuid
+                       field1: c1.PortableDataHash + "," + c1.UUID,
+                       field2: c2.UUID,
+               },
+       } {
+               exitcode := Command.RunCommand("deduplicationreport.test", []string{trial.field1, trial.field2}, &bytes.Buffer{}, &stdout, &stderr)
+               c.Check(exitcode, check.Equals, 0)
+               c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+12 bytes \\(12 B\\).*")
+               c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+8 bytes \\(8 B\\).*")
+               c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+4 bytes \\(4 B\\).*")
+               c.Log(stderr.String())
+               c.Check(stderr.String(), check.Equals, "")
+       }
+}
index adbce90d8d4215329d46eebbe06be66d1f71de43..341929454ad71a87912e566dfdd86153f7f0881e 100644 (file)
@@ -176,7 +176,7 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
 
     parser.add_argument("--enable-dev", action="store_true",
                         help="Enable loading and running development versions "
-                             "of CWL spec.", default=False)
+                             "of the CWL standards.", default=False)
     parser.add_argument('--storage-classes', default="default",
                         help="Specify comma separated list of storage classes to be used when saving workflow output to Keep.")
 
@@ -202,6 +202,14 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
     parser.add_argument("--http-timeout", type=int,
                         default=5*60, dest="http_timeout", help="API request timeout in seconds. Default is 300 seconds (5 minutes).")
 
+    parser.add_argument(
+        "--skip-schemas",
+        action="store_true",
+        help="Skip loading of schemas",
+        default=False,
+        dest="skip_schemas",
+    )
+
     exgroup = parser.add_mutually_exclusive_group()
     exgroup.add_argument("--trash-intermediate", action="store_true",
                         default=False, dest="trash_intermediate",
index 2b55ce9df5afa6b4a5e7d98ded954be50ae40aa0..fb23c2ccf73df514923f4fd0041814c6e8751833 100644 (file)
@@ -150,21 +150,28 @@ class ArvadosContainer(JobBase):
                 with Perf(metrics, "createfiles %s" % self.name):
                     for f, p in sorteditems:
                         if not p.target:
-                            pass
-                        elif p.type in ("File", "Directory", "WritableFile", "WritableDirectory"):
+                            continue
+
+                        if p.target.startswith("/"):
+                            dst = p.target[len(self.outdir)+1:] if p.target.startswith(self.outdir+"/") else p.target[1:]
+                        else:
+                            dst = p.target
+
+                        if p.type in ("File", "Directory", "WritableFile", "WritableDirectory"):
                             if p.resolved.startswith("_:"):
-                                vwd.mkdirs(p.target)
+                                vwd.mkdirs(dst)
                             else:
                                 source, path = self.arvrunner.fs_access.get_collection(p.resolved)
-                                vwd.copy(path or ".", p.target, source_collection=source)
+                                vwd.copy(path or ".", dst, source_collection=source)
                         elif p.type == "CreateFile":
                             if self.arvrunner.secret_store.has_secret(p.resolved):
-                                secret_mounts["%s/%s" % (self.outdir, p.target)] = {
+                                mountpoint = p.target if p.target.startswith("/") else os.path.join(self.outdir, p.target)
+                                secret_mounts[mountpoint] = {
                                     "kind": "text",
                                     "content": self.arvrunner.secret_store.retrieve(p.resolved)
                                 }
                             else:
-                                with vwd.open(p.target, "w") as n:
+                                with vwd.open(dst, "w") as n:
                                     n.write(p.resolved)
 
                 def keepemptydirs(p):
@@ -191,10 +198,14 @@ class ArvadosContainer(JobBase):
                     if (not p.target or self.arvrunner.secret_store.has_secret(p.resolved) or
                         (prev is not None and p.target.startswith(prev))):
                         continue
-                    mountpoint = "%s/%s" % (self.outdir, p.target)
+                    if p.target.startswith("/"):
+                        dst = p.target[len(self.outdir)+1:] if p.target.startswith(self.outdir+"/") else p.target[1:]
+                    else:
+                        dst = p.target
+                    mountpoint = p.target if p.target.startswith("/") else os.path.join(self.outdir, p.target)
                     mounts[mountpoint] = {"kind": "collection",
                                           "portable_data_hash": vwd.portable_data_hash(),
-                                          "path": p.target}
+                                          "path": dst}
                     if p.type.startswith("Writable"):
                         mounts[mountpoint]["writable"] = True
                     prev = p.target + "/"
@@ -316,6 +327,7 @@ class ArvadosContainer(JobBase):
                 logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"])
         except Exception:
             logger.exception("%s got an error", self.arvrunner.label(self))
+            logger.debug("Container request was %s", container_request)
             self.output_callback({}, "permanentFail")
 
     def done(self, record):
index 704edaccb903eb83f1e66c983eb007fe1c4f8711..a9361a85f9fe66a5260a64b6719fa74c28cdeee2 100644 (file)
@@ -57,7 +57,7 @@ class ArvadosCommandTool(CommandLineTool):
                                  "/keep/%s/%s")
 
     def job(self, joborder, output_callback, runtimeContext):
-        builder = make_builder(joborder, self.hints, self.requirements, runtimeContext)
+        builder = make_builder(joborder, self.hints, self.requirements, runtimeContext, self.metadata)
         runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
 
         if runtimeContext.work_api == "containers":
index ddd3c00764c7b0fb42fe97adf50622bd1b23e9cb..97c5fafe792fc06ce099e6a9bc6934671ace580d 100644 (file)
@@ -141,7 +141,8 @@ class ArvadosWorkflowStep(WorkflowStep):
         runtimeContext = runtimeContext.copy()
         runtimeContext.toplevel = True  # Preserve behavior for #13365
 
-        builder = make_builder({shortname(k): v for k,v in viewitems(joborder)}, self.hints, self.requirements, runtimeContext)
+        builder = make_builder({shortname(k): v for k,v in viewitems(joborder)}, self.hints, self.requirements,
+                               runtimeContext, self.metadata)
         runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
         return super(ArvadosWorkflowStep, self).job(joborder, output_callback, runtimeContext)
 
@@ -161,7 +162,7 @@ class ArvadosWorkflow(Workflow):
 
     def job(self, joborder, output_callback, runtimeContext):
 
-        builder = make_builder(joborder, self.hints, self.requirements, runtimeContext)
+        builder = make_builder(joborder, self.hints, self.requirements, runtimeContext, self.metadata)
         runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
 
         req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
index ec91eea6aa807eaea1f37012b0fa1f04d21a1f1f..e8d1347ddfeec7545b8ab9740de38b78b55b4e75 100644 (file)
@@ -507,7 +507,7 @@ The 'jobs' API is no longer supported.
                                               }).execute(num_retries=self.num_retries)
             except Exception:
                 logger.exception("Setting container output")
-                return
+                raise
 
     def apply_reqs(self, job_order_object, tool):
         if "https://w3id.org/cwl/cwl#requirements" in job_order_object:
index bc2c5e34d7b6c2737cc8bdcb541fc1daf394d9ae..4688e65a3748348b8068bd27e1d9e78aa5a5e9de 100644 (file)
@@ -148,6 +148,11 @@ class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
                 return False
             else:
                 raise
+        except IOError as err:
+            if err.errno == errno.ENOENT:
+                return False
+            else:
+                raise
         if collection is not None:
             if rest:
                 return collection.exists(rest)
index 47a304372c58a27ecde8d8c13bb55d6435f9cf79..dcc2a51192dfc4d4b573da302b3373fd08d67fff 100644 (file)
@@ -16,6 +16,7 @@ import arvados.collection
 import urllib.parse
 import logging
 import calendar
+import urllib.parse
 
 logger = logging.getLogger('arvados.cwl-runner')
 
@@ -148,7 +149,9 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
                     logger.info("%d downloaded, %3.2f MiB/s", count, (bps / (1024*1024)))
                 checkpoint = loopnow
 
-    c.save_new(name="Downloaded from %s" % url, owner_uuid=project_uuid, ensure_unique_name=True)
+
+    collectionname = "Downloaded from %s" % urllib.parse.quote(url, safe='')
+    c.save_new(name=collectionname, owner_uuid=project_uuid, ensure_unique_name=True)
 
     api.collections().update(uuid=c.manifest_locator(), body={"collection":{"properties": properties}}).execute()
 
index 4cd204f7df83ba49197f2cdb6ab2a61673a40b28..5bad290773be9f49ef2e87b10b2dac48e70ef75b 100644 (file)
@@ -285,6 +285,7 @@ class StagingPathMapper(PathMapper):
     def visit(self, obj, stagedir, basedir, copy=False, staged=False):
         # type: (Dict[unicode, Any], unicode, unicode, bool) -> None
         loc = obj["location"]
+        stagedir = obj.get("dirname") or stagedir
         tgt = os.path.join(stagedir, obj["basename"])
         basetgt, baseext = os.path.splitext(tgt)
 
index 71e499ebcab0cca29ccbee7a350cfbbb5aaa6e19..b10f02d1401b9e31014eb30b32e18adfdcb394d2 100644 (file)
@@ -83,7 +83,7 @@ def find_defaults(d, op):
             for i in viewvalues(d):
                 find_defaults(i, op)
 
-def make_builder(joborder, hints, requirements, runtimeContext):
+def make_builder(joborder, hints, requirements, runtimeContext, metadata):
     return Builder(
                  job=joborder,
                  files=[],               # type: List[Dict[Text, Text]]
@@ -106,6 +106,7 @@ def make_builder(joborder, hints, requirements, runtimeContext):
                  outdir="",              # type: Text
                  tmpdir="",              # type: Text
                  stagedir="",            # type: Text
+                 cwlVersion=metadata.get("http://commonwl.org/cwltool#original_cwlVersion") or metadata.get("cwlVersion")
                 )
 
 def search_schemadef(name, reqs):
@@ -172,7 +173,10 @@ def set_secondary(fsaccess, builder, inputschema, secondaryspec, primary, discov
         specs = []
         primary["secondaryFiles"] = secondaryspec
         for i, sf in enumerate(aslist(secondaryspec)):
-            pattern = builder.do_eval(sf["pattern"], context=primary)
+            if builder.cwlVersion == "v1.0":
+                pattern = builder.do_eval(sf, context=primary)
+            else:
+                pattern = builder.do_eval(sf["pattern"], context=primary)
             if pattern is None:
                 continue
             if isinstance(pattern, list):
@@ -263,6 +267,8 @@ def upload_dependencies(arvrunner, name, document_loader,
         # that external references in $include and $mixin are captured.
         scanobj = loadref("", workflowobj["id"])
 
+    metadata = scanobj
+
     sc_result = scandeps(uri, scanobj,
                   loadref_fields,
                   set(("$include", "$schemas", "location")),
@@ -354,7 +360,8 @@ def upload_dependencies(arvrunner, name, document_loader,
         builder = make_builder(builder_job_order,
                                obj.get("hints", []),
                                obj.get("requirements", []),
-                               ArvRuntimeContext())
+                               ArvRuntimeContext(),
+                               metadata)
         discover_secondary_files(arvrunner.fs_access,
                                  builder,
                                  obj["inputs"],
@@ -516,7 +523,8 @@ def upload_job_order(arvrunner, name, tool, job_order):
     builder = make_builder(builder_job_order,
                            tool.hints,
                            tool.requirements,
-                           ArvRuntimeContext())
+                           ArvRuntimeContext(),
+                           tool.metadata)
     # Now update job_order with secondaryFiles
     discover_secondary_files(arvrunner.fs_access,
                              builder,
index 40ee679857f4429b0e32cf491339e144695489de..d703fcbc55fdec7889108ca01c898365117733cd 100644 (file)
@@ -39,8 +39,8 @@ setup(name='arvados-cwl-runner',
       # file to determine what version of cwltool and schema-salad to
       # build.
       install_requires=[
-          'cwltool==3.0.20200530110633',
-          'schema-salad==6.0.20200601095207',
+          'cwltool==3.0.20200720165847',
+          'schema-salad==7.0.20200612160654',
           'arvados-python-client{}'.format(pysdk_dep),
           'setuptools',
           'ciso8601 >= 2.0.0'
index 7aa7b0aa43c06a0ae6e2d6615541de0cf428f94a..8d0dee971a89901c216e1223870662e49eb7a7e0 100644 (file)
@@ -1,11 +1,18 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
 cwlVersion: v1.0
 class: CommandLineTool
 requirements:
   - class: InlineJavascriptRequirement
+  - class: ShellCommandRequirement
 arguments:
+  - cd
+  - $(inputs.hello.dirname)
+  - {shellQuote: false, valueFrom: "&&"}
   - ls
-  - -l
-  - $(inputs.hello)
+stdout: hello.out
 inputs:
   hello:
     type: File
@@ -14,4 +21,8 @@ inputs:
       location: keep:4d8a70b1e63b2aad6984e40e338e2373+69/hello.txt
     secondaryFiles:
       - .idx
-outputs: []
\ No newline at end of file
+outputs:
+  out:
+    type: File
+    outputBinding:
+      glob: hello.out
diff --git a/sdk/cwl/tests/16377-missing-default.cwl b/sdk/cwl/tests/16377-missing-default.cwl
new file mode 100644 (file)
index 0000000..b8208e6
--- /dev/null
@@ -0,0 +1,28 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+cwlVersion: v1.0
+class: CommandLineTool
+requirements:
+  - class: InlineJavascriptRequirement
+  - class: ShellCommandRequirement
+arguments:
+  - cd
+  - $(inputs.hello.dirname)
+  - {shellQuote: false, valueFrom: "&&"}
+  - ls
+stdout: hello.out
+inputs:
+  hello:
+    type: File
+    default:
+      class: File
+      location: keep:ffffffffffffffffffffffffffaaaaaa+69/hello.txt
+    secondaryFiles:
+      - .idx
+outputs:
+  out:
+    type: File
+    outputBinding:
+      glob: hello.out
index c4c0968756a46b04ad8b201cbc66241fb4d6826d..a46decd9616cff63fe932e2320568b14c563b3b6 100644 (file)
 
 - job: null
   output:
-    out: null
+    "out": {
+        "location": "hello.out",
+        "class": "File",
+        "checksum": "sha1$ec5d3976351abab45a483a49ce714a8430cb203a",
+        "size": 24
+    }
   tool: 13976-keepref-wf.cwl
   doc: "Test issue 13976"
 
   }
   tool: 16169-no-listing-hint.cwl
   doc: "Test cwltool:LoadListingRequirement propagation"
+
+- job: hello.yml
+  output:
+    "out": {
+        "location": "hello.out",
+        "class": "File",
+        "checksum": "sha1$ec5d3976351abab45a483a49ce714a8430cb203a",
+        "size": 24
+    }
+  tool: 16377-missing-default.cwl
+  doc: "Test issue 16377 - missing default fails even when it should be overridden by valid input"
diff --git a/sdk/cwl/tests/hello.yml b/sdk/cwl/tests/hello.yml
new file mode 100644 (file)
index 0000000..e7a324e
--- /dev/null
@@ -0,0 +1,7 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+hello:
+  class: File
+  location: keep:4d8a70b1e63b2aad6984e40e338e2373+69/hello.txt
index 4119fee383e27bcfe30a97d3de754d1879c067a9..650b5f0598514bbe9fd5ea0de96ab848d2375ad0 100644 (file)
@@ -61,7 +61,7 @@ class TestHttpToKeep(unittest.TestCase):
         getmock.assert_called_with("http://example.com/file1.txt", stream=True, allow_redirects=True)
 
         cm.open.assert_called_with("file1.txt", "wb")
-        cm.save_new.assert_called_with(name="Downloaded from http://example.com/file1.txt",
+        cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
                                        owner_uuid=None, ensure_unique_name=True)
 
         api.collections().update.assert_has_calls([
@@ -189,7 +189,7 @@ class TestHttpToKeep(unittest.TestCase):
         getmock.assert_called_with("http://example.com/file1.txt", stream=True, allow_redirects=True)
 
         cm.open.assert_called_with("file1.txt", "wb")
-        cm.save_new.assert_called_with(name="Downloaded from http://example.com/file1.txt",
+        cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
                                        owner_uuid=None, ensure_unique_name=True)
 
         api.collections().update.assert_has_calls([
@@ -280,7 +280,7 @@ class TestHttpToKeep(unittest.TestCase):
         getmock.assert_called_with("http://example.com/download?fn=/file1.txt", stream=True, allow_redirects=True)
 
         cm.open.assert_called_with("file1.txt", "wb")
-        cm.save_new.assert_called_with(name="Downloaded from http://example.com/download?fn=/file1.txt",
+        cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Fdownload%3Ffn%3D%2Ffile1.txt",
                                        owner_uuid=None, ensure_unique_name=True)
 
         api.collections().update.assert_has_calls([
index 562664c698b34df87ec2f19eb64d867ec5461698..0698db70ff68534ba70aa4176c5487f308cf2559 100644 (file)
@@ -527,9 +527,12 @@ class TestSubmit(unittest.TestCase):
 
     @mock.patch("arvados_cwl.task_queue.TaskQueue")
     @mock.patch("arvados_cwl.arvworkflow.ArvadosWorkflow.job")
-    @mock.patch("arvados_cwl.executor.ArvCwlExecutor.make_output_collection", return_value = (None, None))
+    @mock.patch("arvados_cwl.executor.ArvCwlExecutor.make_output_collection")
     @stubs
     def test_storage_classes_correctly_propagate_to_make_output_collection(self, stubs, make_output, job, tq):
+        final_output_c = arvados.collection.Collection()
+        make_output.return_value = ({},final_output_c)
+
         def set_final_output(job_order, output_callback, runtimeContext):
             output_callback("zzzzz-4zz18-zzzzzzzzzzzzzzzz", "success")
             return []
@@ -538,16 +541,19 @@ class TestSubmit(unittest.TestCase):
         exited = arvados_cwl.main(
             ["--debug", "--local", "--storage-classes=foo",
                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            sys.stdin, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         make_output.assert_called_with(u'Output of submit_wf.cwl', ['foo'], '', 'zzzzz-4zz18-zzzzzzzzzzzzzzzz')
         self.assertEqual(exited, 0)
 
     @mock.patch("arvados_cwl.task_queue.TaskQueue")
     @mock.patch("arvados_cwl.arvworkflow.ArvadosWorkflow.job")
-    @mock.patch("arvados_cwl.executor.ArvCwlExecutor.make_output_collection", return_value = (None, None))
+    @mock.patch("arvados_cwl.executor.ArvCwlExecutor.make_output_collection")
     @stubs
     def test_default_storage_classes_correctly_propagate_to_make_output_collection(self, stubs, make_output, job, tq):
+        final_output_c = arvados.collection.Collection()
+        make_output.return_value = ({},final_output_c)
+
         def set_final_output(job_order, output_callback, runtimeContext):
             output_callback("zzzzz-4zz18-zzzzzzzzzzzzzzzz", "success")
             return []
@@ -556,7 +562,7 @@ class TestSubmit(unittest.TestCase):
         exited = arvados_cwl.main(
             ["--debug", "--local",
                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            sys.stdin, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         make_output.assert_called_with(u'Output of submit_wf.cwl', ['default'], '', 'zzzzz-4zz18-zzzzzzzzzzzzzzzz')
         self.assertEqual(exited, 0)
@@ -1103,7 +1109,10 @@ class TestSubmit(unittest.TestCase):
                                 "outputs": [
                                     {
                                         "id": "#secret_job.cwl/out",
-                                        "type": "stdout"
+                                        "type": "File",
+                                        "outputBinding": {
+                                              "glob": "hashed_example.txt"
+                                        }
                                     }
                                 ],
                                 "stdout": "hashed_example.txt",
@@ -1312,7 +1321,7 @@ class TestSubmit(unittest.TestCase):
                     stubs.capture_stdout, capture_stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
                 self.assertEqual(exited, 1)
-                self.assertRegexpMatches(
+                self.assertRegex(
                     re.sub(r'[ \n]+', ' ', capture_stderr.getvalue()),
                     r"Expected collection uuid zzzzz-4zz18-zzzzzzzzzzzzzzz to be 99999999999999999999999999999998\+99 but API server reported 99999999999999999999999999999997\+99")
             finally:
@@ -1335,7 +1344,7 @@ class TestSubmit(unittest.TestCase):
 
         try:
             self.assertEqual(exited, 1)
-            self.assertRegexpMatches(
+            self.assertRegex(
                 capture_stderr.getvalue(),
                 r"Collection uuid zzzzz-4zz18-zzzzzzzzzzzzzzz not found")
         finally:
diff --git a/sdk/go/arvadostest/db.go b/sdk/go/arvadostest/db.go
new file mode 100644 (file)
index 0000000..41ecfac
--- /dev/null
@@ -0,0 +1,33 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package arvadostest
+
+import (
+       "context"
+
+       "git.arvados.org/arvados.git/lib/ctrlctx"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "github.com/jmoiron/sqlx"
+       _ "github.com/lib/pq"
+       "gopkg.in/check.v1"
+)
+
+// DB returns a DB connection for the given cluster config.
+func DB(c *check.C, cluster *arvados.Cluster) *sqlx.DB {
+       db, err := sqlx.Open("postgres", cluster.PostgreSQL.Connection.String())
+       c.Assert(err, check.IsNil)
+       return db
+}
+
+// TransactionContext returns a context suitable for running a test
+// case in a new transaction, and a rollback func which the caller
+// should call after the test.
+func TransactionContext(c *check.C, db *sqlx.DB) (ctx context.Context, rollback func()) {
+       tx, err := db.Beginx()
+       c.Assert(err, check.IsNil)
+       return ctrlctx.NewWithTransaction(context.Background(), tx), func() {
+               c.Check(tx.Rollback(), check.IsNil)
+       }
+}
index 445775ccedcd1f4ef246297c22a17e470e0f0e94..5c1bb29e764c549d1612bb13f2890a076866fc74 100755 (executable)
@@ -22,6 +22,7 @@ import hmac
 import urllib.parse
 import os
 import hashlib
+import re
 from arvados._version import __version__
 
 EMAIL=0
@@ -169,19 +170,20 @@ def read_migrations(args, by_email, by_username):
 
 def update_username(args, email, user_uuid, username, migratecluster, migratearv):
     print("(%s) Updating username of %s to '%s' on %s" % (email, user_uuid, username, migratecluster))
-    if not args.dry_run:
-        try:
-            conflicts = migratearv.users().list(filters=[["username", "=", username]], bypass_federation=True).execute()
-            if conflicts["items"]:
-                # There's already a user with the username, move the old user out of the way
-                migratearv.users().update(uuid=conflicts["items"][0]["uuid"],
-                                          bypass_federation=True,
-                                          body={"user": {"username": username+"migrate"}}).execute()
-            migratearv.users().update(uuid=user_uuid,
-                                      bypass_federation=True,
-                                      body={"user": {"username": username}}).execute()
-        except arvados.errors.ApiError as e:
-            print("(%s) Error updating username of %s to '%s' on %s: %s" % (email, user_uuid, username, migratecluster, e))
+    if args.dry_run:
+        return
+    try:
+        conflicts = migratearv.users().list(filters=[["username", "=", username]], bypass_federation=True).execute()
+        if conflicts["items"]:
+            # There's already a user with the username, move the old user out of the way
+            migratearv.users().update(uuid=conflicts["items"][0]["uuid"],
+                                        bypass_federation=True,
+                                        body={"user": {"username": username+"migrate"}}).execute()
+        migratearv.users().update(uuid=user_uuid,
+                                    bypass_federation=True,
+                                    body={"user": {"username": username}}).execute()
+    except arvados.errors.ApiError as e:
+        print("(%s) Error updating username of %s to '%s' on %s: %s" % (email, user_uuid, username, migratecluster, e))
 
 
 def choose_new_user(args, by_email, email, userhome, username, old_user_uuid, clusters):
@@ -212,11 +214,17 @@ def choose_new_user(args, by_email, email, userhome, username, old_user_uuid, cl
                 conflicts = homearv.users().list(filters=[["username", "=", username]],
                                                  bypass_federation=True).execute()
                 if conflicts["items"]:
-                    homearv.users().update(uuid=conflicts["items"][0]["uuid"],
-                                           bypass_federation=True,
-                                           body={"user": {"username": username+"migrate"}}).execute()
-                user = homearv.users().create(body={"user": {"email": email, "username": username,
-                                                             "is_active": olduser["is_active"]}}).execute()
+                    homearv.users().update(
+                        uuid=conflicts["items"][0]["uuid"],
+                        bypass_federation=True,
+                        body={"user": {"username": username+"migrate"}}).execute()
+                user = homearv.users().create(
+                    body={"user": {
+                        "email": email,
+                        "first_name": olduser["first_name"],
+                        "last_name": olduser["last_name"],
+                        "username": username,
+                        "is_active": olduser["is_active"]}}).execute()
             except arvados.errors.ApiError as e:
                 print("(%s) Could not create user: %s" % (email, str(e)))
                 return None
@@ -271,7 +279,7 @@ def activate_remote_user(args, email, homearv, migratearv, old_user_uuid, new_us
             newuser = arvados.api(host=ru.netloc, token=salted,
                                   insecure=os.environ.get("ARVADOS_API_HOST_INSECURE")).users().current().execute()
         else:
-            newuser = {"is_active": True, "username": username}
+            newuser = {"is_active": True, "username": email.split('@')[0], "is_admin": False}
     except arvados.errors.ApiError as e:
         print("(%s) Error getting user info for %s from %s: %s" % (email, new_user_uuid, migratecluster, e))
         return None
@@ -287,39 +295,48 @@ def activate_remote_user(args, email, homearv, migratearv, old_user_uuid, new_us
             return None
 
     if olduser["is_admin"] and not newuser["is_admin"]:
-        print("(%s) Not migrating %s because user is admin but target user %s is not admin on %s" % (email, old_user_uuid, new_user_uuid, migratecluster))
+        print("(%s) Not migrating %s because user is admin but target user %s is not admin on %s. Please ensure the user admin status is the same on both clusters. Note that a federated admin account has admin privileges on the entire federation." % (email, old_user_uuid, new_user_uuid, migratecluster))
         return None
 
     return newuser
 
 def migrate_user(args, migratearv, email, new_user_uuid, old_user_uuid):
+    if args.dry_run:
+        return
     try:
-        if not args.dry_run:
+        new_owner_uuid = new_user_uuid
+        if args.data_into_subproject:
             grp = migratearv.groups().create(body={
                 "owner_uuid": new_user_uuid,
                 "name": "Migrated from %s (%s)" % (email, old_user_uuid),
                 "group_class": "project"
             }, ensure_unique_name=True).execute()
-            migratearv.users().merge(old_user_uuid=old_user_uuid,
-                                     new_user_uuid=new_user_uuid,
-                                     new_owner_uuid=grp["uuid"],
-                                     redirect_to_new_user=True).execute()
+            new_owner_uuid = grp["uuid"]
+        migratearv.users().merge(old_user_uuid=old_user_uuid,
+                                    new_user_uuid=new_user_uuid,
+                                    new_owner_uuid=new_owner_uuid,
+                                    redirect_to_new_user=True).execute()
     except arvados.errors.ApiError as e:
-        print("(%s) Error migrating user: %s" % (email, e))
+        name_collision = re.search(r'Key \(owner_uuid, name\)=\((.*?), (.*?)\) already exists\.\n.*UPDATE "(.*?)"', e._get_reason())
+        if name_collision:
+            target_owner, rsc_name, rsc_type = name_collision.groups()
+            print("(%s) Cannot migrate to %s because both origin and target users have a %s named '%s'. Please rename the conflicting items or use --data-into-subproject to migrate all users' data into a special subproject." % (email, target_owner, rsc_type[:-1], rsc_name))
+        else:
+            print("(%s) Skipping user migration because of error: %s" % (email, e))
 
 
 def main():
-
     parser = argparse.ArgumentParser(description='Migrate users to federated identity, see https://doc.arvados.org/admin/merge-remote-account.html')
     parser.add_argument(
         '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
         help='Print version and exit.')
-    parser.add_argument('--tokens', type=str, required=False)
+    parser.add_argument('--tokens', type=str, metavar='FILE', required=False, help="Read tokens from FILE. Not needed when using LoginCluster.")
+    parser.add_argument('--data-into-subproject', action="store_true", help="Migrate user's data into a separate subproject. This can be used to avoid name collisions from within an account.")
     group = parser.add_mutually_exclusive_group(required=True)
-    group.add_argument('--report', type=str, help="Generate report .csv file listing users by email address and their associated Arvados accounts")
-    group.add_argument('--migrate', type=str, help="Consume report .csv and migrate users to designated Arvados accounts")
-    group.add_argument('--dry-run', type=str, help="Consume report .csv and report how user would be migrated to designated Arvados accounts")
-    group.add_argument('--check', action="store_true", help="Check that tokens are usable and the federation is well connected")
+    group.add_argument('--report', type=str, metavar='FILE', help="Generate report .csv file listing users by email address and their associated Arvados accounts.")
+    group.add_argument('--migrate', type=str, metavar='FILE', help="Consume report .csv and migrate users to designated Arvados accounts.")
+    group.add_argument('--dry-run', type=str, metavar='FILE', help="Consume report .csv and report how user would be migrated to designated Arvados accounts.")
+    group.add_argument('--check', action="store_true", help="Check that tokens are usable and the federation is well connected.")
     args = parser.parse_args()
 
     clusters, errors, loginCluster = connect_clusters(args)
index a2c0096165c74b7bc1fda0daf212177cb4d08ac2..c231cc0735795cae9577f9e52f7e5f4bae449bb3 100644 (file)
@@ -1,3 +1,7 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import arvados
 import json
 import sys
@@ -21,7 +25,7 @@ def check_A(users):
     for i in range(1, 10):
         found = False
         for u in users["items"]:
-            if u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i):
+            if u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i) and u["first_name"] == ("Case%d" % i) and u["last_name"] == "Testuser":
                 found = True
                 by_username[u["username"]] = u["uuid"]
         assert found
@@ -60,6 +64,7 @@ for i in range(2, 9):
     found = False
     for u in users["items"]:
         if (u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i) and
+            u["first_name"] == ("Case%d" % i) and u["last_name"] == "Testuser" and
             u["uuid"] == by_username[u["username"]] and u["is_active"] is True):
             found = True
     assert found, "Not found case%i" % i
@@ -67,6 +72,7 @@ for i in range(2, 9):
 found = False
 for u in users["items"]:
     if (u["username"] == "case9" and u["email"] == "case9@test" and
+        u["first_name"] == "Case9" and u["last_name"] == "Testuser" and
         u["uuid"] == by_username[u["username"]] and u["is_active"] is False):
         found = True
 assert found
@@ -87,6 +93,7 @@ for i in (2, 4, 6, 7, 8):
     found = False
     for u in users["items"]:
         if (u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i) and
+            u["first_name"] == ("Case%d" % i) and u["last_name"] == "Testuser" and
             u["uuid"] == by_username[u["username"]] and u["is_active"] is True):
             found = True
     assert found
@@ -97,6 +104,7 @@ for i in (3, 5, 9):
     found = False
     for u in users["items"]:
         if (u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i) and
+            u["first_name"] == ("Case%d" % i) and u["last_name"] == "Testuser" and
             u["uuid"] == by_username[u["username"]] and u["is_active"] is True):
             found = True
     assert not found
index cea624ec4c4e2290635e3949c97135b2c4c992c2..0b5732293d0982fb6f158366c0c2aa894f1674ab 100644 (file)
@@ -1,3 +1,7 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
 import arvados
 import json
 import sys
@@ -11,13 +15,21 @@ apiC = arvados.api(host=j["arvados_api_hosts"][2], token=j["superuser_tokens"][2
 def maketoken(newtok):
     return 'v2/' + newtok["uuid"] + '/' + newtok["api_token"]
 
+def get_user_data(case_nr, is_active=True):
+    return {
+        "email": "case{}@test".format(case_nr),
+        "first_name": "Case{}".format(case_nr),
+        "last_name": "Testuser",
+        "is_active": is_active
+    }
+
 # case 1
 # user only exists on cluster A
-apiA.users().create(body={"user": {"email": "case1@test", "is_active": True}}).execute()
+apiA.users().create(body={"user": get_user_data(case_nr=1)}).execute()
 
 # case 2
 # user exists on cluster A and has remotes on B and C
-case2 = apiA.users().create(body={"user": {"email": "case2@test", "is_active": True}}).execute()
+case2 = apiA.users().create(body={"user": get_user_data(case_nr=2)}).execute()
 newtok = apiA.api_client_authorizations().create(body={
     "api_client_authorization": {'owner_uuid': case2["uuid"]}}).execute()
 arvados.api(host=j["arvados_api_hosts"][1], token=maketoken(newtok), insecure=True).users().current().execute()
@@ -25,11 +37,11 @@ arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtok), insecure=Tr
 
 # case 3
 # user only exists on cluster B
-case3 = apiB.users().create(body={"user": {"email": "case3@test", "is_active": True}}).execute()
+case3 = apiB.users().create(body={"user": get_user_data(case_nr=3)}).execute()
 
 # case 4
 # user only exists on cluster B and has remotes on A and C
-case4 = apiB.users().create(body={"user": {"email": "case4@test", "is_active": True}}).execute()
+case4 = apiB.users().create(body={"user": get_user_data(case_nr=4)}).execute()
 newtok = apiB.api_client_authorizations().create(body={
     "api_client_authorization": {'owner_uuid': case4["uuid"]}}).execute()
 arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtok), insecure=True).users().current().execute()
@@ -38,18 +50,18 @@ arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtok), insecure=Tr
 
 # case 5
 # user exists on both cluster A and B
-case5 = apiA.users().create(body={"user": {"email": "case5@test", "is_active": True}}).execute()
-case5 = apiB.users().create(body={"user": {"email": "case5@test", "is_active": True}}).execute()
+case5 = apiA.users().create(body={"user": get_user_data(case_nr=5)}).execute()
+case5 = apiB.users().create(body={"user": get_user_data(case_nr=5)}).execute()
 
 # case 6
 # user exists on both cluster A and B, with remotes on A, B and C
-case6_A = apiA.users().create(body={"user": {"email": "case6@test", "is_active": True}}).execute()
+case6_A = apiA.users().create(body={"user": get_user_data(case_nr=6)}).execute()
 newtokA = apiA.api_client_authorizations().create(body={
     "api_client_authorization": {'owner_uuid': case6_A["uuid"]}}).execute()
 arvados.api(host=j["arvados_api_hosts"][1], token=maketoken(newtokA), insecure=True).users().current().execute()
 arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtokA), insecure=True).users().current().execute()
 
-case6_B = apiB.users().create(body={"user": {"email": "case6@test", "is_active": True}}).execute()
+case6_B = apiB.users().create(body={"user": get_user_data(case_nr=6)}).execute()
 newtokB = apiB.api_client_authorizations().create(body={
     "api_client_authorization": {'owner_uuid': case6_B["uuid"]}}).execute()
 arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtokB), insecure=True).users().current().execute()
@@ -57,13 +69,13 @@ arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtokB), insecure=T
 
 # case 7
 # user exists on both cluster B and A, with remotes on A, B and C
-case7_B = apiB.users().create(body={"user": {"email": "case7@test", "is_active": True}}).execute()
+case7_B = apiB.users().create(body={"user": get_user_data(case_nr=7)}).execute()
 newtokB = apiB.api_client_authorizations().create(body={
     "api_client_authorization": {'owner_uuid': case7_B["uuid"]}}).execute()
 arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtokB), insecure=True).users().current().execute()
 arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtokB), insecure=True).users().current().execute()
 
-case7_A = apiA.users().create(body={"user": {"email": "case7@test", "is_active": True}}).execute()
+case7_A = apiA.users().create(body={"user": get_user_data(case_nr=7)}).execute()
 newtokA = apiA.api_client_authorizations().create(body={
     "api_client_authorization": {'owner_uuid': case7_A["uuid"]}}).execute()
 arvados.api(host=j["arvados_api_hosts"][1], token=maketoken(newtokA), insecure=True).users().current().execute()
@@ -71,13 +83,13 @@ arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtokA), insecure=T
 
 # case 8
 # user exists on both cluster B and C, with remotes on A, B and C
-case8_B = apiB.users().create(body={"user": {"email": "case8@test", "is_active": True}}).execute()
+case8_B = apiB.users().create(body={"user": get_user_data(case_nr=8)}).execute()
 newtokB = apiB.api_client_authorizations().create(body={
     "api_client_authorization": {'owner_uuid': case8_B["uuid"]}}).execute()
 arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtokB), insecure=True).users().current().execute()
 arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtokB), insecure=True).users().current().execute()
 
-case8_C = apiC.users().create(body={"user": {"email": "case8@test", "is_active": True}}).execute()
+case8_C = apiC.users().create(body={"user": get_user_data(case_nr=8)}).execute()
 newtokC = apiC.api_client_authorizations().create(body={
     "api_client_authorization": {'owner_uuid': case8_C["uuid"]}}).execute()
 arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtokC), insecure=True).users().current().execute()
@@ -85,4 +97,4 @@ arvados.api(host=j["arvados_api_hosts"][1], token=maketoken(newtokC), insecure=T
 
 # case 9
 # user only exists on cluster B, but is inactive
-case9 = apiB.users().create(body={"user": {"email": "case9@test", "is_active": False}}).execute()
+case9 = apiB.users().create(body={"user": get_user_data(case_nr=9, is_active=False)}).execute()
index 59aca1e5b4cabbc4f1f20117d9e1d76f474dc826..292a4fd746a6697fafa0bda5155e766bab79618d 100755 (executable)
@@ -194,7 +194,7 @@ run() {
             localip=$(ip addr show $defaultdev | grep 'inet ' | sed 's/ *inet \(.*\)\/.*/\1/')
         fi
        echo "Public arvbox will use address $localip"
-        iptemp=$(tempfile)
+        iptemp=$(mktemp)
         echo $localip > $iptemp
         chmod og+r $iptemp
         PUBLIC="--volume=$iptemp:/var/run/localip_override