*.asc
sdk/java-v2/build.gradle
sdk/java-v2/settings.gradle
-sdk/cwl/tests/wf/feddemo
\ No newline at end of file
+sdk/cwl/tests/wf/feddemo
+go.mod
+go.sum
"git.arvados.org/arvados.git/lib/cli"
"git.arvados.org/arvados.git/lib/cmd"
+ "git.arvados.org/arvados.git/lib/deduplicationreport"
"git.arvados.org/arvados.git/lib/mount"
)
"virtual_machine": cli.APICall,
"workflow": cli.APICall,
- "mount": mount.Command,
+ "mount": mount.Command,
+ "deduplication-report": deduplicationreport.Command,
})
)
github.com/docker/docker v1.4.2-0.20180109013817-94b8a116fbf1
github.com/docker/go-connections v0.3.0 // indirect
github.com/docker/go-units v0.3.3-0.20171221200356-d59758554a3d // indirect
+ github.com/dustin/go-humanize v1.0.0
github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 // indirect
github.com/fsnotify/fsnotify v1.4.9
github.com/ghodss/yaml v1.0.0
github.com/imdario/mergo v0.3.8-0.20190415133143-5ef87b449ca7
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
github.com/jmcvetta/randutil v0.0.0-20150817122601-2bb1b664bcff
+ github.com/jmoiron/sqlx v1.2.0
github.com/julienschmidt/httprouter v1.2.0
github.com/karalabe/xgo v0.0.0-20191115072854-c5ccff8648a7 // indirect
github.com/kevinburke/ssh_config v0.0.0-20171013211458-802051befeb5 // indirect
github.com/docker/go-connections v0.3.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
github.com/docker/go-units v0.3.3-0.20171221200356-d59758554a3d h1:dVaNRYvaGV23AdNdsm+4y1mPN0tj3/1v6taqKMmM6Ko=
github.com/docker/go-units v0.3.3-0.20171221200356-d59758554a3d/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
+github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4zYo=
+github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568 h1:BHsljHzVlRcyQhjrss6TZTdY2VfCqZPbv5k3iBFa2ZQ=
github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc=
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
github.com/go-ldap/ldap v3.0.3+incompatible/go.mod h1:qfd9rJvER9Q0/D/Sqn1DfHRoBp40uXYvFoEVrNEPqRc=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
+github.com/go-sql-driver/mysql v1.4.0/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/gogo/protobuf v1.1.1 h1:72R+M5VuhED/KujmZVcIquuo8mBgX4oVda//DQb3PXo=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/jmcvetta/randutil v0.0.0-20150817122601-2bb1b664bcff/go.mod h1:ddfPX8Z28YMjiqoaJhNBzWHapTHXejnB5cDCUWDwriw=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af h1:pmfjZENx5imkbgOkpRUYLnmbU7UEFbjtDA2hxJ1ichM=
github.com/jmespath/go-jmespath v0.0.0-20180206201540-c2b33e8439af/go.mod h1:Nht3zPeWKUH0NzdCt2Blrr5ys8VGpn0CEB0cQHVjt7k=
+github.com/jmoiron/sqlx v1.2.0 h1:41Ip0zITnmWNR/vHV+S4m+VoUivnWY5E4OJfLZjCJMA=
+github.com/jmoiron/sqlx v1.2.0/go.mod h1:1FEQNm3xlJgrMD+FBdI9+xvCksHtbpVBBw5dYhBSsks=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
+github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/lib/pq v1.3.0 h1:/qkRGz8zljWiDcFvgpwUpwIAPu3r07TDvs3Rws+o/pU=
github.com/lib/pq v1.3.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/marstr/guid v1.1.1-0.20170427235115-8bdf7d1a087c h1:ouxemItv3B/Zh008HJkEXDYCN3BIRyNHxtUN7ThJ5Js=
github.com/marstr/guid v1.1.1-0.20170427235115-8bdf7d1a087c/go.mod h1:74gB1z2wpxxInTG6yaqA7KrtM0NZ+RbrcqDvYHefzho=
+github.com/mattn/go-sqlite3 v1.9.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc=
github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747 h1:eQox4Rh4ewJF+mqYPxCkmBAirRnPaHEB26UkNuPyjlk=
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+// Package api provides types used by controller/server-component
+// packages.
+package api
+
+import "context"
+
+// A RoutableFunc calls an API method (sometimes via a wrapped
+// RoutableFunc) that has real argument types.
+//
+// (It is used by ctrlctx to manage database transactions, so moving
+// it to the router package would cause a circular dependency
+// router->arvadostest->ctrlctx->router.)
+type RoutableFunc func(ctx context.Context, opts interface{}) (interface{}, error)
import (
"context"
- "database/sql"
"errors"
"fmt"
"net/http"
"time"
"git.arvados.org/arvados.git/lib/controller/federation"
- "git.arvados.org/arvados.git/lib/controller/localdb"
"git.arvados.org/arvados.git/lib/controller/railsproxy"
"git.arvados.org/arvados.git/lib/controller/router"
+ "git.arvados.org/arvados.git/lib/ctrlctx"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/health"
"git.arvados.org/arvados.git/sdk/go/httpserver"
+ "github.com/jmoiron/sqlx"
_ "github.com/lib/pq"
)
proxy *proxy
secureClient *http.Client
insecureClient *http.Client
- pgdb *sql.DB
+ pgdb *sqlx.DB
pgdbMtx sync.Mutex
}
Routes: health.Routes{"ping": func() error { _, err := h.db(context.TODO()); return err }},
})
- rtr := router.New(federation.New(h.Cluster), localdb.WrapCallsInTransactions(h.db))
+ rtr := router.New(federation.New(h.Cluster), ctrlctx.WrapCallsInTransactions(h.db))
mux.Handle("/arvados/v1/config", rtr)
mux.Handle("/"+arvados.EndpointUserAuthenticate.Path, rtr)
var errDBConnection = errors.New("database connection error")
-func (h *Handler) db(ctx context.Context) (*sql.DB, error) {
+func (h *Handler) db(ctx context.Context) (*sqlx.DB, error) {
h.pgdbMtx.Lock()
defer h.pgdbMtx.Unlock()
if h.pgdb != nil {
return h.pgdb, nil
}
- db, err := sql.Open("postgres", h.Cluster.PostgreSQL.Connection.String())
+ db, err := sqlx.Open("postgres", h.Cluster.PostgreSQL.Connection.String())
if err != nil {
ctxlog.FromContext(ctx).WithError(err).Error("postgresql connect failed")
return nil, errDBConnection
func NewConn(cluster *arvados.Cluster) *Conn {
railsProxy := railsproxy.NewConn(cluster)
- return &Conn{
+ var conn Conn
+ conn = Conn{
cluster: cluster,
railsProxy: railsProxy,
loginController: chooseLoginController(cluster, railsProxy),
}
+ return &conn
}
func (conn *Conn) Logout(ctx context.Context, opts arvados.LogoutOptions) (arvados.LogoutResponse, error) {
"strings"
"git.arvados.org/arvados.git/lib/controller/rpc"
+ "git.arvados.org/arvados.git/lib/ctrlctx"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/auth"
"git.arvados.org/arvados.git/sdk/go/httpserver"
return
}
token := target.Query().Get("api_token")
- tx, err := currenttx(ctx)
+ tx, err := ctrlctx.CurrentTx(ctx)
if err != nil {
return
}
}
var exp sql.NullString
var scopes []byte
- err = tx.QueryRowContext(ctx, "select uuid, api_token, expires_at, scopes from api_client_authorizations where api_token=$1", tokensecret).Scan(&resp.UUID, &resp.APIToken, &exp, &scopes)
+ err = tx.QueryRowxContext(ctx, "select uuid, api_token, expires_at, scopes from api_client_authorizations where api_token=$1", tokensecret).Scan(&resp.UUID, &resp.APIToken, &exp, &scopes)
if err != nil {
return
}
import (
"context"
- "database/sql"
"encoding/json"
"net"
"net/http"
"git.arvados.org/arvados.git/lib/config"
"git.arvados.org/arvados.git/lib/controller/railsproxy"
+ "git.arvados.org/arvados.git/lib/ctrlctx"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadostest"
"git.arvados.org/arvados.git/sdk/go/auth"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
"github.com/bradleypeabody/godap"
+ "github.com/jmoiron/sqlx"
check "gopkg.in/check.v1"
)
cluster *arvados.Cluster
ctrl *ldapLoginController
ldap *godap.LDAPServer // fake ldap server that accepts auth goodusername/goodpassword
- db *sql.DB
+ db *sqlx.DB
// transaction context
ctx context.Context
- rollback func()
+ rollback func() error
}
func (s *LDAPSuite) TearDownSuite(c *check.C) {
Cluster: s.cluster,
RailsProxy: railsproxy.NewConn(s.cluster),
}
- s.db = testdb(c, s.cluster)
+ s.db = arvadostest.DB(c, s.cluster)
}
func (s *LDAPSuite) SetUpTest(c *check.C) {
- s.ctx, s.rollback = testctx(c, s.db)
+ tx, err := s.db.Beginx()
+ c.Assert(err, check.IsNil)
+ s.ctx = ctrlctx.NewWithTransaction(context.Background(), tx)
+ s.rollback = tx.Rollback
}
func (s *LDAPSuite) TearDownTest(c *check.C) {
- s.rollback()
+ if s.rollback != nil {
+ s.rollback()
+ }
}
func (s *LDAPSuite) TestLoginSuccess(c *check.C) {
"net/http"
"strings"
+ "git.arvados.org/arvados.git/lib/controller/api"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/auth"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
type router struct {
mux *mux.Router
backend arvados.API
- wrapCalls func(RoutableFunc) RoutableFunc
+ wrapCalls func(api.RoutableFunc) api.RoutableFunc
}
// New returns a new router (which implements the http.Handler
// the returned method is used in its place. This can be used to
// install hooks before and after each API call and alter responses;
// see localdb.WrapCallsInTransaction for an example.
-func New(backend arvados.API, wrapCalls func(RoutableFunc) RoutableFunc) *router {
+func New(backend arvados.API, wrapCalls func(api.RoutableFunc) api.RoutableFunc) *router {
rtr := &router{
mux: mux.NewRouter(),
backend: backend,
return rtr
}
-type RoutableFunc func(ctx context.Context, opts interface{}) (interface{}, error)
-
func (rtr *router) addRoutes() {
for _, route := range []struct {
endpoint arvados.APIEndpoint
defaultOpts func() interface{}
- exec RoutableFunc
+ exec api.RoutableFunc
}{
{
arvados.EndpointConfigGet,
"GET": "HEAD", // Accept HEAD at any GET route
}
-func (rtr *router) addRoute(endpoint arvados.APIEndpoint, defaultOpts func() interface{}, exec RoutableFunc) {
+func (rtr *router) addRoute(endpoint arvados.APIEndpoint, defaultOpts func() interface{}, exec api.RoutableFunc) {
methods := []string{endpoint.Method}
if alt, ok := altMethod[endpoint.Method]; ok {
methods = append(methods, alt)
//
// SPDX-License-Identifier: AGPL-3.0
-package localdb
+package ctrlctx
import (
"context"
- "database/sql"
"errors"
"sync"
- "git.arvados.org/arvados.git/lib/controller/router"
+ "git.arvados.org/arvados.git/lib/controller/api"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "github.com/jmoiron/sqlx"
+ _ "github.com/lib/pq"
+)
+
+var (
+ ErrNoTransaction = errors.New("bug: there is no transaction in this context")
+ ErrContextFinished = errors.New("refusing to start a transaction after wrapped function already returned")
)
// WrapCallsInTransactions returns a call wrapper (suitable for
//
// The wrapper calls getdb() to get a database handle before each API
// call.
-func WrapCallsInTransactions(getdb func(context.Context) (*sql.DB, error)) func(router.RoutableFunc) router.RoutableFunc {
- return func(origFunc router.RoutableFunc) router.RoutableFunc {
+func WrapCallsInTransactions(getdb func(context.Context) (*sqlx.DB, error)) func(api.RoutableFunc) api.RoutableFunc {
+ return func(origFunc api.RoutableFunc) api.RoutableFunc {
return func(ctx context.Context, opts interface{}) (_ interface{}, err error) {
- ctx, finishtx := starttx(ctx, getdb)
+ ctx, finishtx := New(ctx, getdb)
defer finishtx(&err)
return origFunc(ctx, opts)
}
}
}
-// ContextWithTransaction returns a child context in which the given
+// NewWithTransaction returns a child context in which the given
// transaction will be used by any localdb API call that needs one.
// The caller is responsible for calling Commit or Rollback on tx.
-func ContextWithTransaction(ctx context.Context, tx *sql.Tx) context.Context {
+func NewWithTransaction(ctx context.Context, tx *sqlx.Tx) context.Context {
txn := &transaction{tx: tx}
txn.setup.Do(func() {})
return context.WithValue(ctx, contextKeyTransaction, txn)
var contextKeyTransaction = contextKeyT("transaction")
type transaction struct {
- tx *sql.Tx
+ tx *sqlx.Tx
err error
- getdb func(context.Context) (*sql.DB, error)
+ getdb func(context.Context) (*sqlx.DB, error)
setup sync.Once
}
-type transactionFinishFunc func(*error)
+type finishFunc func(*error)
-// starttx returns a new child context that can be used with
-// currenttx(). It does not open a database transaction until the
-// first call to currenttx().
+// New returns a new child context that can be used with
+// CurrentTx(). It does not open a database transaction until the
+// first call to CurrentTx().
//
// The caller must eventually call the returned finishtx() func to
// commit or rollback the transaction, if any.
//
// func example(ctx context.Context) (err error) {
-// ctx, finishtx := starttx(ctx, dber)
+// ctx, finishtx := New(ctx, dber)
// defer finishtx(&err)
// // ...
-// tx, err := currenttx(ctx)
+// tx, err := CurrentTx(ctx)
// if err != nil {
// return fmt.Errorf("example: %s", err)
// }
//
// If *err is non-nil, finishtx() rolls back the transaction, and
// does not modify *err.
-func starttx(ctx context.Context, getdb func(context.Context) (*sql.DB, error)) (context.Context, transactionFinishFunc) {
+func New(ctx context.Context, getdb func(context.Context) (*sqlx.DB, error)) (context.Context, finishFunc) {
txn := &transaction{getdb: getdb}
return context.WithValue(ctx, contextKeyTransaction, txn), func(err *error) {
txn.setup.Do(func() {
// Using (*sync.Once)Do() prevents a future
- // call to currenttx() from opening a
+ // call to CurrentTx() from opening a
// transaction which would never get committed
- // or rolled back. If currenttx() hasn't been
+ // or rolled back. If CurrentTx() hasn't been
// called before now, future calls will return
// this error.
- txn.err = errors.New("refusing to start a transaction after wrapped function already returned")
+ txn.err = ErrContextFinished
})
if txn.tx == nil {
// we never [successfully] started a transaction
}
}
-func currenttx(ctx context.Context) (*sql.Tx, error) {
+func CurrentTx(ctx context.Context) (*sqlx.Tx, error) {
txn, ok := ctx.Value(contextKeyTransaction).(*transaction)
if !ok {
- return nil, errors.New("bug: there is no transaction in this context")
+ return nil, ErrNoTransaction
}
txn.setup.Do(func() {
if db, err := txn.getdb(ctx); err != nil {
txn.err = err
} else {
- txn.tx, txn.err = db.Begin()
+ txn.tx, txn.err = db.Beginx()
}
})
return txn.tx, txn.err
//
// SPDX-License-Identifier: AGPL-3.0
-package localdb
+package ctrlctx
import (
"context"
- "database/sql"
"sync"
"sync/atomic"
+ "testing"
"git.arvados.org/arvados.git/lib/config"
- "git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "github.com/jmoiron/sqlx"
_ "github.com/lib/pq"
check "gopkg.in/check.v1"
)
-// testdb returns a DB connection for the given cluster config.
-func testdb(c *check.C, cluster *arvados.Cluster) *sql.DB {
- db, err := sql.Open("postgres", cluster.PostgreSQL.Connection.String())
- c.Assert(err, check.IsNil)
- return db
-}
-
-// testctx returns a context suitable for running a test case in a new
-// transaction, and a rollback func which the caller should call after
-// the test.
-func testctx(c *check.C, db *sql.DB) (ctx context.Context, rollback func()) {
- tx, err := db.Begin()
- c.Assert(err, check.IsNil)
- return ContextWithTransaction(context.Background(), tx), func() {
- c.Check(tx.Rollback(), check.IsNil)
- }
+// Gocheck boilerplate
+func Test(t *testing.T) {
+ check.TestingT(t)
}
var _ = check.Suite(&DatabaseSuite{})
c.Assert(err, check.IsNil)
var getterCalled int64
- getter := func(context.Context) (*sql.DB, error) {
+ getter := func(context.Context) (*sqlx.DB, error) {
atomic.AddInt64(&getterCalled, 1)
- return testdb(c, cluster), nil
+ db, err := sqlx.Open("postgres", cluster.PostgreSQL.Connection.String())
+ c.Assert(err, check.IsNil)
+ return db, nil
}
wrapper := WrapCallsInTransactions(getter)
wrappedFunc := wrapper(func(ctx context.Context, opts interface{}) (interface{}, error) {
- txes := make([]*sql.Tx, 20)
+ txes := make([]*sqlx.Tx, 20)
var wg sync.WaitGroup
for i := range txes {
i := i
wg.Add(1)
go func() {
- // Concurrent calls to currenttx(),
+ // Concurrent calls to CurrentTx(),
// with different children of the same
// parent context, will all return the
// same transaction.
defer wg.Done()
ctx, cancel := context.WithCancel(ctx)
defer cancel()
- tx, err := currenttx(ctx)
+ tx, err := CurrentTx(ctx)
c.Check(err, check.IsNil)
txes[i] = tx
}()
c.Check(err, check.IsNil)
c.Check(getterCalled, check.Equals, int64(1))
- // When a wrapped func returns without calling currenttx(),
- // calling currenttx() later shouldn't start a new
+ // When a wrapped func returns without calling CurrentTx(),
+ // calling CurrentTx() later shouldn't start a new
// transaction.
var savedctx context.Context
ok, err = wrapper(func(ctx context.Context, opts interface{}) (interface{}, error) {
})(context.Background(), "blah")
c.Check(ok, check.Equals, true)
c.Check(err, check.IsNil)
- tx, err := currenttx(savedctx)
+ tx, err := CurrentTx(savedctx)
c.Check(tx, check.IsNil)
c.Check(err, check.NotNil)
}
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package deduplicationreport
+
+import (
+ "io"
+
+ "git.arvados.org/arvados.git/lib/config"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "github.com/sirupsen/logrus"
+)
+
+var Command command
+
+type command struct{}
+
+type NoPrefixFormatter struct{}
+
+func (f *NoPrefixFormatter) Format(entry *logrus.Entry) ([]byte, error) {
+ return []byte(entry.Message), nil
+}
+
+// RunCommand implements the subcommand "deduplication-report <collection> <collection> ..."
+func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+ var err error
+ logger := ctxlog.New(stderr, "text", "info")
+ defer func() {
+ if err != nil {
+ logger.WithError(err).Error("fatal")
+ }
+ }()
+
+ logger.SetFormatter(new(NoPrefixFormatter))
+
+ loader := config.NewLoader(stdin, logger)
+ loader.SkipLegacy = true
+
+ exitcode := report(prog, args, loader, logger, stdout, stderr)
+
+ return exitcode
+}
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package deduplicationreport
+
+import (
+ "flag"
+ "fmt"
+ "io"
+ "strings"
+
+ "git.arvados.org/arvados.git/lib/config"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadosclient"
+ "git.arvados.org/arvados.git/sdk/go/manifest"
+
+ "github.com/dustin/go-humanize"
+ "github.com/sirupsen/logrus"
+)
+
+func deDuplicate(inputs []string) (trimmed []string) {
+ seen := make(map[string]bool)
+ for _, uuid := range inputs {
+ if !seen[uuid] {
+ seen[uuid] = true
+ trimmed = append(trimmed, uuid)
+ }
+ }
+ return
+}
+
+func parseFlags(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stderr io.Writer) (exitcode int, inputs []string) {
+ flags := flag.NewFlagSet("", flag.ContinueOnError)
+ flags.SetOutput(stderr)
+ flags.Usage = func() {
+ fmt.Fprintf(flags.Output(), `
+Usage:
+ %s [options ...] <collection-uuid> <collection-uuid> ...
+
+ %s [options ...] <collection-pdh>,<collection_uuid> \
+ <collection-pdh>,<collection_uuid> ...
+
+ This program analyzes the overlap in blocks used by 2 or more collections. It
+ prints a deduplication report that shows the nominal space used by the
+ collections, as well as the actual size and the amount of space that is saved
+ by Keep's deduplication.
+
+ The list of collections may be provided in two ways. A list of collection
+ uuids is sufficient. Alternatively, the PDH for each collection may also be
+ provided. This is will greatly speed up operation when the list contains
+ multiple collections with the same PDH.
+
+ Exit status will be zero if there were no errors generating the report.
+
+Example:
+
+ Use the 'arv' and 'jq' commands to get the list of the 100
+ largest collections and generate the deduplication report:
+
+ arv collection list --order 'file_size_total desc' --limit 100 | \
+ jq -r '.items[] | [.portable_data_hash,.uuid] |@csv' | \
+ tail -n+2 |sed -e 's/"//g'|tr '\n' ' ' | \
+ xargs %s
+
+Options:
+`, prog, prog, prog)
+ flags.PrintDefaults()
+ }
+ loader.SetupFlags(flags)
+ loglevel := flags.String("log-level", "info", "logging level (debug, info, ...)")
+ err := flags.Parse(args)
+ if err == flag.ErrHelp {
+ return 0, inputs
+ } else if err != nil {
+ return 2, inputs
+ }
+
+ inputs = flags.Args()
+
+ inputs = deDuplicate(inputs)
+
+ if len(inputs) < 1 {
+ logger.Errorf("Error: no collections provided")
+ flags.Usage()
+ return 2, inputs
+ }
+
+ lvl, err := logrus.ParseLevel(*loglevel)
+ if err != nil {
+ return 2, inputs
+ }
+ logger.SetLevel(lvl)
+ return
+}
+
+func blockList(collection arvados.Collection) (blocks map[string]int) {
+ blocks = make(map[string]int)
+ m := manifest.Manifest{Text: collection.ManifestText}
+ blockChannel := m.BlockIterWithDuplicates()
+ for b := range blockChannel {
+ blocks[b.Digest.String()] = b.Size
+ }
+ return
+}
+
+func report(prog string, args []string, loader *config.Loader, logger *logrus.Logger, stdout, stderr io.Writer) (exitcode int) {
+
+ var inputs []string
+ exitcode, inputs = parseFlags(prog, args, loader, logger, stderr)
+ if exitcode != 0 {
+ return
+ }
+
+ // Arvados Client setup
+ arv, err := arvadosclient.MakeArvadosClient()
+ if err != nil {
+ logger.Errorf("Error creating Arvados object: %s", err)
+ exitcode = 1
+ return
+ }
+
+ type Col struct {
+ FileSizeTotal int64
+ FileCount int64
+ }
+
+ blocks := make(map[string]map[string]int)
+ pdhs := make(map[string]Col)
+ var nominalSize int64
+
+ for _, input := range inputs {
+ var uuid string
+ var pdh string
+ if strings.Contains(input, ",") {
+ // The input is in the format pdh,uuid. This will allow us to save time on duplicate pdh's
+ tmp := strings.Split(input, ",")
+ pdh = tmp[0]
+ uuid = tmp[1]
+ } else {
+ // The input must be a plain uuid
+ uuid = input
+ }
+ if !strings.Contains(uuid, "-4zz18-") {
+ logger.Errorf("Error: uuid must refer to collection object")
+ exitcode = 1
+ return
+ }
+ if _, ok := pdhs[pdh]; ok {
+ // We've processed a collection with this pdh already. Simply add its
+ // size to the totals and move on to the next one.
+ // Note that we simply trust the PDH matches the collection UUID here,
+ // in other words, we use it over the UUID. If they don't match, the report
+ // will be wrong.
+ nominalSize += pdhs[pdh].FileSizeTotal
+ } else {
+ var collection arvados.Collection
+ err = arv.Get("collections", uuid, nil, &collection)
+ if err != nil {
+ logger.Errorf("Error: unable to retrieve collection: %s", err)
+ exitcode = 1
+ return
+ }
+ blocks[uuid] = make(map[string]int)
+ blocks[uuid] = blockList(collection)
+ if pdh != "" && collection.PortableDataHash != pdh {
+ logger.Errorf("Error: the collection with UUID %s has PDH %s, but a different PDH was provided in the arguments: %s", uuid, collection.PortableDataHash, pdh)
+ exitcode = 1
+ return
+ }
+ if pdh == "" {
+ pdh = collection.PortableDataHash
+ }
+
+ col := Col{}
+ if collection.FileSizeTotal != 0 || collection.FileCount != 0 {
+ nominalSize += collection.FileSizeTotal
+ col.FileSizeTotal = collection.FileSizeTotal
+ col.FileCount = int64(collection.FileCount)
+ } else {
+ // Collections created with old Arvados versions do not always have the total file size and count cached in the collections object
+ var collSize int64
+ for _, size := range blocks[uuid] {
+ collSize += int64(size)
+ }
+ nominalSize += collSize
+ col.FileSizeTotal = collSize
+ }
+ pdhs[pdh] = col
+ }
+
+ if pdhs[pdh].FileCount != 0 {
+ fmt.Fprintf(stdout, "Collection %s: pdh %s; nominal size %d (%s); file count %d\n", uuid, pdh, pdhs[pdh].FileSizeTotal, humanize.IBytes(uint64(pdhs[pdh].FileSizeTotal)), pdhs[pdh].FileCount)
+ } else {
+ fmt.Fprintf(stdout, "Collection %s: pdh %s; nominal size %d (%s)\n", uuid, pdh, pdhs[pdh].FileSizeTotal, humanize.IBytes(uint64(pdhs[pdh].FileSizeTotal)))
+ }
+ }
+
+ var totalSize int64
+ seen := make(map[string]bool)
+ for _, v := range blocks {
+ for pdh, size := range v {
+ if !seen[pdh] {
+ seen[pdh] = true
+ totalSize += int64(size)
+ }
+ }
+ }
+ fmt.Fprintln(stdout)
+ fmt.Fprintf(stdout, "Collections: %15d\n", len(inputs))
+ fmt.Fprintf(stdout, "Nominal size of stored data: %15d bytes (%s)\n", nominalSize, humanize.IBytes(uint64(nominalSize)))
+ fmt.Fprintf(stdout, "Actual size of stored data: %15d bytes (%s)\n", totalSize, humanize.IBytes(uint64(totalSize)))
+ fmt.Fprintf(stdout, "Saved by Keep deduplication: %15d bytes (%s)\n", nominalSize-totalSize, humanize.IBytes(uint64(nominalSize-totalSize)))
+
+ return exitcode
+}
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package deduplicationreport
+
+import (
+ "bytes"
+ "testing"
+
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadostest"
+ "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) {
+ check.TestingT(t)
+}
+
+var _ = check.Suite(&Suite{})
+
+type Suite struct{}
+
+func (s *Suite) TearDownSuite(c *check.C) {
+ // Undo any changes/additions to the database so they don't affect subsequent tests.
+ arvadostest.ResetEnv()
+}
+
+func (*Suite) TestUsage(c *check.C) {
+ var stdout, stderr bytes.Buffer
+ exitcode := Command.RunCommand("deduplicationreport.test", []string{"-log-level=debug"}, &bytes.Buffer{}, &stdout, &stderr)
+ c.Check(exitcode, check.Equals, 2)
+ c.Check(stdout.String(), check.Equals, "")
+ c.Log(stderr.String())
+ c.Check(stderr.String(), check.Matches, `(?ms).*Usage:.*`)
+}
+
+func (*Suite) TestTwoIdenticalUUIDs(c *check.C) {
+ var stdout, stderr bytes.Buffer
+ // Run dedupreport with 2 identical uuids
+ exitcode := Command.RunCommand("deduplicationreport.test", []string{arvadostest.FooCollection, arvadostest.FooCollection}, &bytes.Buffer{}, &stdout, &stderr)
+ c.Check(exitcode, check.Equals, 0)
+ c.Check(stdout.String(), check.Matches, "(?ms).*Collections:[[:space:]]+1.*")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+3 bytes \\(3 B\\).*")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+3 bytes \\(3 B\\).*")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+0 bytes \\(0 B\\).*")
+ c.Log(stderr.String())
+}
+
+func (*Suite) TestTwoUUIDsInvalidPDH(c *check.C) {
+ var stdout, stderr bytes.Buffer
+ // Run dedupreport with pdh,uuid where pdh does not match
+ exitcode := Command.RunCommand("deduplicationreport.test", []string{arvadostest.FooAndBarFilesInDirPDH + "," + arvadostest.FooCollection, arvadostest.FooCollection}, &bytes.Buffer{}, &stdout, &stderr)
+ c.Check(exitcode, check.Equals, 1)
+ c.Check(stdout.String(), check.Equals, "")
+ c.Log(stderr.String())
+ c.Check(stderr.String(), check.Matches, `(?ms).*Error: the collection with UUID zzzzz-4zz18-fy296fx3hot09f7 has PDH 1f4b0bc7583c2a7f9102c395f4ffc5e3\+45, but a different PDH was provided in the arguments: 870369fc72738603c2fad16664e50e2d\+58.*`)
+}
+
+func (*Suite) TestNonExistentCollection(c *check.C) {
+ var stdout, stderr bytes.Buffer
+ // Run dedupreport with many UUIDs
+ exitcode := Command.RunCommand("deduplicationreport.test", []string{arvadostest.FooCollection, arvadostest.NonexistentCollection}, &bytes.Buffer{}, &stdout, &stderr)
+ c.Check(exitcode, check.Equals, 1)
+ c.Check(stdout.String(), check.Equals, "Collection zzzzz-4zz18-fy296fx3hot09f7: pdh 1f4b0bc7583c2a7f9102c395f4ffc5e3+45; nominal size 3 (3 B)\n")
+ c.Log(stderr.String())
+ c.Check(stderr.String(), check.Matches, `(?ms).*Error: unable to retrieve collection:.*404 Not Found.*`)
+}
+
+func (*Suite) TestManyUUIDsNoOverlap(c *check.C) {
+ var stdout, stderr bytes.Buffer
+ // Run dedupreport with 5 UUIDs
+ exitcode := Command.RunCommand("deduplicationreport.test", []string{arvadostest.FooCollection, arvadostest.HelloWorldCollection, arvadostest.FooBarDirCollection, arvadostest.WazVersion1Collection, arvadostest.UserAgreementCollection}, &bytes.Buffer{}, &stdout, &stderr)
+ c.Check(exitcode, check.Equals, 0)
+ c.Check(stdout.String(), check.Matches, "(?ms).*Collections:[[:space:]]+5.*")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+249049 bytes \\(243 KiB\\).*")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+249049 bytes \\(243 KiB\\).*")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+0 bytes \\(0 B\\).*")
+ c.Log(stderr.String())
+ c.Check(stderr.String(), check.Equals, "")
+}
+
+func (*Suite) TestTwoOverlappingCollections(c *check.C) {
+ var stdout, stderr bytes.Buffer
+ // Create two collections
+ arv := arvados.NewClientFromEnv()
+
+ var c1 arvados.Collection
+ err := arv.RequestAndDecode(&c1, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". d3b07384d113edec49eaa6238ad5ff00+4 0:4:foo\n"}})
+ c.Assert(err, check.Equals, nil)
+
+ var c2 arvados.Collection
+ err = arv.RequestAndDecode(&c2, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{"manifest_text": ". c157a79031e1c40f85931829bc5fc552+4 d3b07384d113edec49eaa6238ad5ff00+4 0:4:bar 4:4:foo\n"}})
+ c.Assert(err, check.Equals, nil)
+
+ for _, trial := range []struct {
+ field1 string
+ field2 string
+ }{
+ {
+ // Run dedupreport with 2 arguments: uuid uuid
+ field1: c1.UUID,
+ field2: c2.UUID,
+ },
+ {
+ // Run dedupreport with 2 arguments: pdh,uuid uuid
+ field1: c1.PortableDataHash + "," + c1.UUID,
+ field2: c2.UUID,
+ },
+ } {
+ exitcode := Command.RunCommand("deduplicationreport.test", []string{trial.field1, trial.field2}, &bytes.Buffer{}, &stdout, &stderr)
+ c.Check(exitcode, check.Equals, 0)
+ c.Check(stdout.String(), check.Matches, "(?ms).*Nominal size of stored data:[[:space:]]+12 bytes \\(12 B\\).*")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Actual size of stored data:[[:space:]]+8 bytes \\(8 B\\).*")
+ c.Check(stdout.String(), check.Matches, "(?ms).*Saved by Keep deduplication:[[:space:]]+4 bytes \\(4 B\\).*")
+ c.Log(stderr.String())
+ c.Check(stderr.String(), check.Equals, "")
+ }
+}
parser.add_argument("--enable-dev", action="store_true",
help="Enable loading and running development versions "
- "of CWL spec.", default=False)
+ "of the CWL standards.", default=False)
parser.add_argument('--storage-classes', default="default",
help="Specify comma separated list of storage classes to be used when saving workflow output to Keep.")
parser.add_argument("--http-timeout", type=int,
default=5*60, dest="http_timeout", help="API request timeout in seconds. Default is 300 seconds (5 minutes).")
+ parser.add_argument(
+ "--skip-schemas",
+ action="store_true",
+ help="Skip loading of schemas",
+ default=False,
+ dest="skip_schemas",
+ )
+
exgroup = parser.add_mutually_exclusive_group()
exgroup.add_argument("--trash-intermediate", action="store_true",
default=False, dest="trash_intermediate",
with Perf(metrics, "createfiles %s" % self.name):
for f, p in sorteditems:
if not p.target:
- pass
- elif p.type in ("File", "Directory", "WritableFile", "WritableDirectory"):
+ continue
+
+ if p.target.startswith("/"):
+ dst = p.target[len(self.outdir)+1:] if p.target.startswith(self.outdir+"/") else p.target[1:]
+ else:
+ dst = p.target
+
+ if p.type in ("File", "Directory", "WritableFile", "WritableDirectory"):
if p.resolved.startswith("_:"):
- vwd.mkdirs(p.target)
+ vwd.mkdirs(dst)
else:
source, path = self.arvrunner.fs_access.get_collection(p.resolved)
- vwd.copy(path or ".", p.target, source_collection=source)
+ vwd.copy(path or ".", dst, source_collection=source)
elif p.type == "CreateFile":
if self.arvrunner.secret_store.has_secret(p.resolved):
- secret_mounts["%s/%s" % (self.outdir, p.target)] = {
+ mountpoint = p.target if p.target.startswith("/") else os.path.join(self.outdir, p.target)
+ secret_mounts[mountpoint] = {
"kind": "text",
"content": self.arvrunner.secret_store.retrieve(p.resolved)
}
else:
- with vwd.open(p.target, "w") as n:
+ with vwd.open(dst, "w") as n:
n.write(p.resolved)
def keepemptydirs(p):
if (not p.target or self.arvrunner.secret_store.has_secret(p.resolved) or
(prev is not None and p.target.startswith(prev))):
continue
- mountpoint = "%s/%s" % (self.outdir, p.target)
+ if p.target.startswith("/"):
+ dst = p.target[len(self.outdir)+1:] if p.target.startswith(self.outdir+"/") else p.target[1:]
+ else:
+ dst = p.target
+ mountpoint = p.target if p.target.startswith("/") else os.path.join(self.outdir, p.target)
mounts[mountpoint] = {"kind": "collection",
"portable_data_hash": vwd.portable_data_hash(),
- "path": p.target}
+ "path": dst}
if p.type.startswith("Writable"):
mounts[mountpoint]["writable"] = True
prev = p.target + "/"
logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"])
except Exception:
logger.exception("%s got an error", self.arvrunner.label(self))
+ logger.debug("Container request was %s", container_request)
self.output_callback({}, "permanentFail")
def done(self, record):
"/keep/%s/%s")
def job(self, joborder, output_callback, runtimeContext):
- builder = make_builder(joborder, self.hints, self.requirements, runtimeContext)
+ builder = make_builder(joborder, self.hints, self.requirements, runtimeContext, self.metadata)
runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
if runtimeContext.work_api == "containers":
runtimeContext = runtimeContext.copy()
runtimeContext.toplevel = True # Preserve behavior for #13365
- builder = make_builder({shortname(k): v for k,v in viewitems(joborder)}, self.hints, self.requirements, runtimeContext)
+ builder = make_builder({shortname(k): v for k,v in viewitems(joborder)}, self.hints, self.requirements,
+ runtimeContext, self.metadata)
runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
return super(ArvadosWorkflowStep, self).job(joborder, output_callback, runtimeContext)
def job(self, joborder, output_callback, runtimeContext):
- builder = make_builder(joborder, self.hints, self.requirements, runtimeContext)
+ builder = make_builder(joborder, self.hints, self.requirements, runtimeContext, self.metadata)
runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
}).execute(num_retries=self.num_retries)
except Exception:
logger.exception("Setting container output")
- return
+ raise
def apply_reqs(self, job_order_object, tool):
if "https://w3id.org/cwl/cwl#requirements" in job_order_object:
return False
else:
raise
+ except IOError as err:
+ if err.errno == errno.ENOENT:
+ return False
+ else:
+ raise
if collection is not None:
if rest:
return collection.exists(rest)
import urllib.parse
import logging
import calendar
+import urllib.parse
logger = logging.getLogger('arvados.cwl-runner')
logger.info("%d downloaded, %3.2f MiB/s", count, (bps / (1024*1024)))
checkpoint = loopnow
- c.save_new(name="Downloaded from %s" % url, owner_uuid=project_uuid, ensure_unique_name=True)
+
+ collectionname = "Downloaded from %s" % urllib.parse.quote(url, safe='')
+ c.save_new(name=collectionname, owner_uuid=project_uuid, ensure_unique_name=True)
api.collections().update(uuid=c.manifest_locator(), body={"collection":{"properties": properties}}).execute()
def visit(self, obj, stagedir, basedir, copy=False, staged=False):
# type: (Dict[unicode, Any], unicode, unicode, bool) -> None
loc = obj["location"]
+ stagedir = obj.get("dirname") or stagedir
tgt = os.path.join(stagedir, obj["basename"])
basetgt, baseext = os.path.splitext(tgt)
for i in viewvalues(d):
find_defaults(i, op)
-def make_builder(joborder, hints, requirements, runtimeContext):
+def make_builder(joborder, hints, requirements, runtimeContext, metadata):
return Builder(
job=joborder,
files=[], # type: List[Dict[Text, Text]]
outdir="", # type: Text
tmpdir="", # type: Text
stagedir="", # type: Text
+ cwlVersion=metadata.get("http://commonwl.org/cwltool#original_cwlVersion") or metadata.get("cwlVersion")
)
def search_schemadef(name, reqs):
specs = []
primary["secondaryFiles"] = secondaryspec
for i, sf in enumerate(aslist(secondaryspec)):
- pattern = builder.do_eval(sf["pattern"], context=primary)
+ if builder.cwlVersion == "v1.0":
+ pattern = builder.do_eval(sf, context=primary)
+ else:
+ pattern = builder.do_eval(sf["pattern"], context=primary)
if pattern is None:
continue
if isinstance(pattern, list):
# that external references in $include and $mixin are captured.
scanobj = loadref("", workflowobj["id"])
+ metadata = scanobj
+
sc_result = scandeps(uri, scanobj,
loadref_fields,
set(("$include", "$schemas", "location")),
builder = make_builder(builder_job_order,
obj.get("hints", []),
obj.get("requirements", []),
- ArvRuntimeContext())
+ ArvRuntimeContext(),
+ metadata)
discover_secondary_files(arvrunner.fs_access,
builder,
obj["inputs"],
builder = make_builder(builder_job_order,
tool.hints,
tool.requirements,
- ArvRuntimeContext())
+ ArvRuntimeContext(),
+ tool.metadata)
# Now update job_order with secondaryFiles
discover_secondary_files(arvrunner.fs_access,
builder,
# file to determine what version of cwltool and schema-salad to
# build.
install_requires=[
- 'cwltool==3.0.20200530110633',
- 'schema-salad==6.0.20200601095207',
+ 'cwltool==3.0.20200720165847',
+ 'schema-salad==7.0.20200612160654',
'arvados-python-client{}'.format(pysdk_dep),
'setuptools',
'ciso8601 >= 2.0.0'
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
cwlVersion: v1.0
class: CommandLineTool
requirements:
- class: InlineJavascriptRequirement
+ - class: ShellCommandRequirement
arguments:
+ - cd
+ - $(inputs.hello.dirname)
+ - {shellQuote: false, valueFrom: "&&"}
- ls
- - -l
- - $(inputs.hello)
+stdout: hello.out
inputs:
hello:
type: File
location: keep:4d8a70b1e63b2aad6984e40e338e2373+69/hello.txt
secondaryFiles:
- .idx
-outputs: []
\ No newline at end of file
+outputs:
+ out:
+ type: File
+ outputBinding:
+ glob: hello.out
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+cwlVersion: v1.0
+class: CommandLineTool
+requirements:
+ - class: InlineJavascriptRequirement
+ - class: ShellCommandRequirement
+arguments:
+ - cd
+ - $(inputs.hello.dirname)
+ - {shellQuote: false, valueFrom: "&&"}
+ - ls
+stdout: hello.out
+inputs:
+ hello:
+ type: File
+ default:
+ class: File
+ location: keep:ffffffffffffffffffffffffffaaaaaa+69/hello.txt
+ secondaryFiles:
+ - .idx
+outputs:
+ out:
+ type: File
+ outputBinding:
+ glob: hello.out
- job: null
output:
- out: null
+ "out": {
+ "location": "hello.out",
+ "class": "File",
+ "checksum": "sha1$ec5d3976351abab45a483a49ce714a8430cb203a",
+ "size": 24
+ }
tool: 13976-keepref-wf.cwl
doc: "Test issue 13976"
}
tool: 16169-no-listing-hint.cwl
doc: "Test cwltool:LoadListingRequirement propagation"
+
+- job: hello.yml
+ output:
+ "out": {
+ "location": "hello.out",
+ "class": "File",
+ "checksum": "sha1$ec5d3976351abab45a483a49ce714a8430cb203a",
+ "size": 24
+ }
+ tool: 16377-missing-default.cwl
+ doc: "Test issue 16377 - missing default fails even when it should be overridden by valid input"
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+hello:
+ class: File
+ location: keep:4d8a70b1e63b2aad6984e40e338e2373+69/hello.txt
getmock.assert_called_with("http://example.com/file1.txt", stream=True, allow_redirects=True)
cm.open.assert_called_with("file1.txt", "wb")
- cm.save_new.assert_called_with(name="Downloaded from http://example.com/file1.txt",
+ cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
owner_uuid=None, ensure_unique_name=True)
api.collections().update.assert_has_calls([
getmock.assert_called_with("http://example.com/file1.txt", stream=True, allow_redirects=True)
cm.open.assert_called_with("file1.txt", "wb")
- cm.save_new.assert_called_with(name="Downloaded from http://example.com/file1.txt",
+ cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Ffile1.txt",
owner_uuid=None, ensure_unique_name=True)
api.collections().update.assert_has_calls([
getmock.assert_called_with("http://example.com/download?fn=/file1.txt", stream=True, allow_redirects=True)
cm.open.assert_called_with("file1.txt", "wb")
- cm.save_new.assert_called_with(name="Downloaded from http://example.com/download?fn=/file1.txt",
+ cm.save_new.assert_called_with(name="Downloaded from http%3A%2F%2Fexample.com%2Fdownload%3Ffn%3D%2Ffile1.txt",
owner_uuid=None, ensure_unique_name=True)
api.collections().update.assert_has_calls([
@mock.patch("arvados_cwl.task_queue.TaskQueue")
@mock.patch("arvados_cwl.arvworkflow.ArvadosWorkflow.job")
- @mock.patch("arvados_cwl.executor.ArvCwlExecutor.make_output_collection", return_value = (None, None))
+ @mock.patch("arvados_cwl.executor.ArvCwlExecutor.make_output_collection")
@stubs
def test_storage_classes_correctly_propagate_to_make_output_collection(self, stubs, make_output, job, tq):
+ final_output_c = arvados.collection.Collection()
+ make_output.return_value = ({},final_output_c)
+
def set_final_output(job_order, output_callback, runtimeContext):
output_callback("zzzzz-4zz18-zzzzzzzzzzzzzzzz", "success")
return []
exited = arvados_cwl.main(
["--debug", "--local", "--storage-classes=foo",
"tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
- sys.stdin, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+ stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
make_output.assert_called_with(u'Output of submit_wf.cwl', ['foo'], '', 'zzzzz-4zz18-zzzzzzzzzzzzzzzz')
self.assertEqual(exited, 0)
@mock.patch("arvados_cwl.task_queue.TaskQueue")
@mock.patch("arvados_cwl.arvworkflow.ArvadosWorkflow.job")
- @mock.patch("arvados_cwl.executor.ArvCwlExecutor.make_output_collection", return_value = (None, None))
+ @mock.patch("arvados_cwl.executor.ArvCwlExecutor.make_output_collection")
@stubs
def test_default_storage_classes_correctly_propagate_to_make_output_collection(self, stubs, make_output, job, tq):
+ final_output_c = arvados.collection.Collection()
+ make_output.return_value = ({},final_output_c)
+
def set_final_output(job_order, output_callback, runtimeContext):
output_callback("zzzzz-4zz18-zzzzzzzzzzzzzzzz", "success")
return []
exited = arvados_cwl.main(
["--debug", "--local",
"tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
- sys.stdin, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+ stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
make_output.assert_called_with(u'Output of submit_wf.cwl', ['default'], '', 'zzzzz-4zz18-zzzzzzzzzzzzzzzz')
self.assertEqual(exited, 0)
"outputs": [
{
"id": "#secret_job.cwl/out",
- "type": "stdout"
+ "type": "File",
+ "outputBinding": {
+ "glob": "hashed_example.txt"
+ }
}
],
"stdout": "hashed_example.txt",
stubs.capture_stdout, capture_stderr, api_client=stubs.api, keep_client=stubs.keep_client)
self.assertEqual(exited, 1)
- self.assertRegexpMatches(
+ self.assertRegex(
re.sub(r'[ \n]+', ' ', capture_stderr.getvalue()),
r"Expected collection uuid zzzzz-4zz18-zzzzzzzzzzzzzzz to be 99999999999999999999999999999998\+99 but API server reported 99999999999999999999999999999997\+99")
finally:
try:
self.assertEqual(exited, 1)
- self.assertRegexpMatches(
+ self.assertRegex(
capture_stderr.getvalue(),
r"Collection uuid zzzzz-4zz18-zzzzzzzzzzzzzzz not found")
finally:
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package arvadostest
+
+import (
+ "context"
+
+ "git.arvados.org/arvados.git/lib/ctrlctx"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "github.com/jmoiron/sqlx"
+ _ "github.com/lib/pq"
+ "gopkg.in/check.v1"
+)
+
+// DB returns a DB connection for the given cluster config.
+func DB(c *check.C, cluster *arvados.Cluster) *sqlx.DB {
+ db, err := sqlx.Open("postgres", cluster.PostgreSQL.Connection.String())
+ c.Assert(err, check.IsNil)
+ return db
+}
+
+// TransactionContext returns a context suitable for running a test
+// case in a new transaction, and a rollback func which the caller
+// should call after the test.
+func TransactionContext(c *check.C, db *sqlx.DB) (ctx context.Context, rollback func()) {
+ tx, err := db.Beginx()
+ c.Assert(err, check.IsNil)
+ return ctrlctx.NewWithTransaction(context.Background(), tx), func() {
+ c.Check(tx.Rollback(), check.IsNil)
+ }
+}
import urllib.parse
import os
import hashlib
+import re
from arvados._version import __version__
EMAIL=0
def update_username(args, email, user_uuid, username, migratecluster, migratearv):
print("(%s) Updating username of %s to '%s' on %s" % (email, user_uuid, username, migratecluster))
- if not args.dry_run:
- try:
- conflicts = migratearv.users().list(filters=[["username", "=", username]], bypass_federation=True).execute()
- if conflicts["items"]:
- # There's already a user with the username, move the old user out of the way
- migratearv.users().update(uuid=conflicts["items"][0]["uuid"],
- bypass_federation=True,
- body={"user": {"username": username+"migrate"}}).execute()
- migratearv.users().update(uuid=user_uuid,
- bypass_federation=True,
- body={"user": {"username": username}}).execute()
- except arvados.errors.ApiError as e:
- print("(%s) Error updating username of %s to '%s' on %s: %s" % (email, user_uuid, username, migratecluster, e))
+ if args.dry_run:
+ return
+ try:
+ conflicts = migratearv.users().list(filters=[["username", "=", username]], bypass_federation=True).execute()
+ if conflicts["items"]:
+ # There's already a user with the username, move the old user out of the way
+ migratearv.users().update(uuid=conflicts["items"][0]["uuid"],
+ bypass_federation=True,
+ body={"user": {"username": username+"migrate"}}).execute()
+ migratearv.users().update(uuid=user_uuid,
+ bypass_federation=True,
+ body={"user": {"username": username}}).execute()
+ except arvados.errors.ApiError as e:
+ print("(%s) Error updating username of %s to '%s' on %s: %s" % (email, user_uuid, username, migratecluster, e))
def choose_new_user(args, by_email, email, userhome, username, old_user_uuid, clusters):
conflicts = homearv.users().list(filters=[["username", "=", username]],
bypass_federation=True).execute()
if conflicts["items"]:
- homearv.users().update(uuid=conflicts["items"][0]["uuid"],
- bypass_federation=True,
- body={"user": {"username": username+"migrate"}}).execute()
- user = homearv.users().create(body={"user": {"email": email, "username": username,
- "is_active": olduser["is_active"]}}).execute()
+ homearv.users().update(
+ uuid=conflicts["items"][0]["uuid"],
+ bypass_federation=True,
+ body={"user": {"username": username+"migrate"}}).execute()
+ user = homearv.users().create(
+ body={"user": {
+ "email": email,
+ "first_name": olduser["first_name"],
+ "last_name": olduser["last_name"],
+ "username": username,
+ "is_active": olduser["is_active"]}}).execute()
except arvados.errors.ApiError as e:
print("(%s) Could not create user: %s" % (email, str(e)))
return None
newuser = arvados.api(host=ru.netloc, token=salted,
insecure=os.environ.get("ARVADOS_API_HOST_INSECURE")).users().current().execute()
else:
- newuser = {"is_active": True, "username": username}
+ newuser = {"is_active": True, "username": email.split('@')[0], "is_admin": False}
except arvados.errors.ApiError as e:
print("(%s) Error getting user info for %s from %s: %s" % (email, new_user_uuid, migratecluster, e))
return None
return None
if olduser["is_admin"] and not newuser["is_admin"]:
- print("(%s) Not migrating %s because user is admin but target user %s is not admin on %s" % (email, old_user_uuid, new_user_uuid, migratecluster))
+ print("(%s) Not migrating %s because user is admin but target user %s is not admin on %s. Please ensure the user admin status is the same on both clusters. Note that a federated admin account has admin privileges on the entire federation." % (email, old_user_uuid, new_user_uuid, migratecluster))
return None
return newuser
def migrate_user(args, migratearv, email, new_user_uuid, old_user_uuid):
+ if args.dry_run:
+ return
try:
- if not args.dry_run:
+ new_owner_uuid = new_user_uuid
+ if args.data_into_subproject:
grp = migratearv.groups().create(body={
"owner_uuid": new_user_uuid,
"name": "Migrated from %s (%s)" % (email, old_user_uuid),
"group_class": "project"
}, ensure_unique_name=True).execute()
- migratearv.users().merge(old_user_uuid=old_user_uuid,
- new_user_uuid=new_user_uuid,
- new_owner_uuid=grp["uuid"],
- redirect_to_new_user=True).execute()
+ new_owner_uuid = grp["uuid"]
+ migratearv.users().merge(old_user_uuid=old_user_uuid,
+ new_user_uuid=new_user_uuid,
+ new_owner_uuid=new_owner_uuid,
+ redirect_to_new_user=True).execute()
except arvados.errors.ApiError as e:
- print("(%s) Error migrating user: %s" % (email, e))
+ name_collision = re.search(r'Key \(owner_uuid, name\)=\((.*?), (.*?)\) already exists\.\n.*UPDATE "(.*?)"', e._get_reason())
+ if name_collision:
+ target_owner, rsc_name, rsc_type = name_collision.groups()
+ print("(%s) Cannot migrate to %s because both origin and target users have a %s named '%s'. Please rename the conflicting items or use --data-into-subproject to migrate all users' data into a special subproject." % (email, target_owner, rsc_type[:-1], rsc_name))
+ else:
+ print("(%s) Skipping user migration because of error: %s" % (email, e))
def main():
-
parser = argparse.ArgumentParser(description='Migrate users to federated identity, see https://doc.arvados.org/admin/merge-remote-account.html')
parser.add_argument(
'--version', action='version', version="%s %s" % (sys.argv[0], __version__),
help='Print version and exit.')
- parser.add_argument('--tokens', type=str, required=False)
+ parser.add_argument('--tokens', type=str, metavar='FILE', required=False, help="Read tokens from FILE. Not needed when using LoginCluster.")
+ parser.add_argument('--data-into-subproject', action="store_true", help="Migrate user's data into a separate subproject. This can be used to avoid name collisions from within an account.")
group = parser.add_mutually_exclusive_group(required=True)
- group.add_argument('--report', type=str, help="Generate report .csv file listing users by email address and their associated Arvados accounts")
- group.add_argument('--migrate', type=str, help="Consume report .csv and migrate users to designated Arvados accounts")
- group.add_argument('--dry-run', type=str, help="Consume report .csv and report how user would be migrated to designated Arvados accounts")
- group.add_argument('--check', action="store_true", help="Check that tokens are usable and the federation is well connected")
+ group.add_argument('--report', type=str, metavar='FILE', help="Generate report .csv file listing users by email address and their associated Arvados accounts.")
+ group.add_argument('--migrate', type=str, metavar='FILE', help="Consume report .csv and migrate users to designated Arvados accounts.")
+ group.add_argument('--dry-run', type=str, metavar='FILE', help="Consume report .csv and report how user would be migrated to designated Arvados accounts.")
+ group.add_argument('--check', action="store_true", help="Check that tokens are usable and the federation is well connected.")
args = parser.parse_args()
clusters, errors, loginCluster = connect_clusters(args)
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
import arvados
import json
import sys
for i in range(1, 10):
found = False
for u in users["items"]:
- if u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i):
+ if u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i) and u["first_name"] == ("Case%d" % i) and u["last_name"] == "Testuser":
found = True
by_username[u["username"]] = u["uuid"]
assert found
found = False
for u in users["items"]:
if (u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i) and
+ u["first_name"] == ("Case%d" % i) and u["last_name"] == "Testuser" and
u["uuid"] == by_username[u["username"]] and u["is_active"] is True):
found = True
assert found, "Not found case%i" % i
found = False
for u in users["items"]:
if (u["username"] == "case9" and u["email"] == "case9@test" and
+ u["first_name"] == "Case9" and u["last_name"] == "Testuser" and
u["uuid"] == by_username[u["username"]] and u["is_active"] is False):
found = True
assert found
found = False
for u in users["items"]:
if (u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i) and
+ u["first_name"] == ("Case%d" % i) and u["last_name"] == "Testuser" and
u["uuid"] == by_username[u["username"]] and u["is_active"] is True):
found = True
assert found
found = False
for u in users["items"]:
if (u["username"] == ("case%d" % i) and u["email"] == ("case%d@test" % i) and
+ u["first_name"] == ("Case%d" % i) and u["last_name"] == "Testuser" and
u["uuid"] == by_username[u["username"]] and u["is_active"] is True):
found = True
assert not found
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
import arvados
import json
import sys
def maketoken(newtok):
return 'v2/' + newtok["uuid"] + '/' + newtok["api_token"]
+def get_user_data(case_nr, is_active=True):
+ return {
+ "email": "case{}@test".format(case_nr),
+ "first_name": "Case{}".format(case_nr),
+ "last_name": "Testuser",
+ "is_active": is_active
+ }
+
# case 1
# user only exists on cluster A
-apiA.users().create(body={"user": {"email": "case1@test", "is_active": True}}).execute()
+apiA.users().create(body={"user": get_user_data(case_nr=1)}).execute()
# case 2
# user exists on cluster A and has remotes on B and C
-case2 = apiA.users().create(body={"user": {"email": "case2@test", "is_active": True}}).execute()
+case2 = apiA.users().create(body={"user": get_user_data(case_nr=2)}).execute()
newtok = apiA.api_client_authorizations().create(body={
"api_client_authorization": {'owner_uuid': case2["uuid"]}}).execute()
arvados.api(host=j["arvados_api_hosts"][1], token=maketoken(newtok), insecure=True).users().current().execute()
# case 3
# user only exists on cluster B
-case3 = apiB.users().create(body={"user": {"email": "case3@test", "is_active": True}}).execute()
+case3 = apiB.users().create(body={"user": get_user_data(case_nr=3)}).execute()
# case 4
# user only exists on cluster B and has remotes on A and C
-case4 = apiB.users().create(body={"user": {"email": "case4@test", "is_active": True}}).execute()
+case4 = apiB.users().create(body={"user": get_user_data(case_nr=4)}).execute()
newtok = apiB.api_client_authorizations().create(body={
"api_client_authorization": {'owner_uuid': case4["uuid"]}}).execute()
arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtok), insecure=True).users().current().execute()
# case 5
# user exists on both cluster A and B
-case5 = apiA.users().create(body={"user": {"email": "case5@test", "is_active": True}}).execute()
-case5 = apiB.users().create(body={"user": {"email": "case5@test", "is_active": True}}).execute()
+case5 = apiA.users().create(body={"user": get_user_data(case_nr=5)}).execute()
+case5 = apiB.users().create(body={"user": get_user_data(case_nr=5)}).execute()
# case 6
# user exists on both cluster A and B, with remotes on A, B and C
-case6_A = apiA.users().create(body={"user": {"email": "case6@test", "is_active": True}}).execute()
+case6_A = apiA.users().create(body={"user": get_user_data(case_nr=6)}).execute()
newtokA = apiA.api_client_authorizations().create(body={
"api_client_authorization": {'owner_uuid': case6_A["uuid"]}}).execute()
arvados.api(host=j["arvados_api_hosts"][1], token=maketoken(newtokA), insecure=True).users().current().execute()
arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtokA), insecure=True).users().current().execute()
-case6_B = apiB.users().create(body={"user": {"email": "case6@test", "is_active": True}}).execute()
+case6_B = apiB.users().create(body={"user": get_user_data(case_nr=6)}).execute()
newtokB = apiB.api_client_authorizations().create(body={
"api_client_authorization": {'owner_uuid': case6_B["uuid"]}}).execute()
arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtokB), insecure=True).users().current().execute()
# case 7
# user exists on both cluster B and A, with remotes on A, B and C
-case7_B = apiB.users().create(body={"user": {"email": "case7@test", "is_active": True}}).execute()
+case7_B = apiB.users().create(body={"user": get_user_data(case_nr=7)}).execute()
newtokB = apiB.api_client_authorizations().create(body={
"api_client_authorization": {'owner_uuid': case7_B["uuid"]}}).execute()
arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtokB), insecure=True).users().current().execute()
arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtokB), insecure=True).users().current().execute()
-case7_A = apiA.users().create(body={"user": {"email": "case7@test", "is_active": True}}).execute()
+case7_A = apiA.users().create(body={"user": get_user_data(case_nr=7)}).execute()
newtokA = apiA.api_client_authorizations().create(body={
"api_client_authorization": {'owner_uuid': case7_A["uuid"]}}).execute()
arvados.api(host=j["arvados_api_hosts"][1], token=maketoken(newtokA), insecure=True).users().current().execute()
# case 8
# user exists on both cluster B and C, with remotes on A, B and C
-case8_B = apiB.users().create(body={"user": {"email": "case8@test", "is_active": True}}).execute()
+case8_B = apiB.users().create(body={"user": get_user_data(case_nr=8)}).execute()
newtokB = apiB.api_client_authorizations().create(body={
"api_client_authorization": {'owner_uuid': case8_B["uuid"]}}).execute()
arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtokB), insecure=True).users().current().execute()
arvados.api(host=j["arvados_api_hosts"][2], token=maketoken(newtokB), insecure=True).users().current().execute()
-case8_C = apiC.users().create(body={"user": {"email": "case8@test", "is_active": True}}).execute()
+case8_C = apiC.users().create(body={"user": get_user_data(case_nr=8)}).execute()
newtokC = apiC.api_client_authorizations().create(body={
"api_client_authorization": {'owner_uuid': case8_C["uuid"]}}).execute()
arvados.api(host=j["arvados_api_hosts"][0], token=maketoken(newtokC), insecure=True).users().current().execute()
# case 9
# user only exists on cluster B, but is inactive
-case9 = apiB.users().create(body={"user": {"email": "case9@test", "is_active": False}}).execute()
+case9 = apiB.users().create(body={"user": get_user_data(case_nr=9, is_active=False)}).execute()
localip=$(ip addr show $defaultdev | grep 'inet ' | sed 's/ *inet \(.*\)\/.*/\1/')
fi
echo "Public arvbox will use address $localip"
- iptemp=$(tempfile)
+ iptemp=$(mktemp)
echo $localip > $iptemp
chmod og+r $iptemp
PUBLIC="--volume=$iptemp:/var/run/localip_override