16526: Merge branch 'master' into 16526-ruby-and-python-build-script-updates

author Ward Vandewege <ward@curii.com>

Fri, 19 Jun 2020 15:31:54 +0000 (11:31 -0400)

committer Ward Vandewege <ward@curii.com>

Fri, 19 Jun 2020 15:32:53 +0000 (11:32 -0400)
author Ward Vandewege <ward@curii.com>
Fri, 19 Jun 2020 15:31:54 +0000 (11:31 -0400)
committer Ward Vandewege <ward@curii.com>
Fri, 19 Jun 2020 15:32:53 +0000 (11:32 -0400)
diff --git a/cmd/arvados-server/cmd.go b/cmd/arvados-server/cmd.go

index 1b2de11accefe995511194c5941f25af3ccd35e4..ff99de75c41ad13f630d0902c2e695c6c17ad5c9 100644 (file)
--- a/cmd/arvados-server/cmd.go
+++ b/cmd/arvados-server/cmd.go
@@ -15,7 +15,7 @@ import (
         "git.arvados.org/arvados.git/lib/crunchrun"
         "git.arvados.org/arvados.git/lib/dispatchcloud"
         "git.arvados.org/arvados.git/lib/install"
-       "git.arvados.org/arvados.git/lib/undelete"
+       "git.arvados.org/arvados.git/lib/recovercollection"
         "git.arvados.org/arvados.git/services/ws"
  )
  
@@ -25,17 +25,17 @@ var (
                 "-version":  cmd.Version,
                 "--version": cmd.Version,
  
-               "boot":            boot.Command,
-               "cloudtest":       cloudtest.Command,
-               "config-check":    config.CheckCommand,
-               "config-defaults": config.DumpDefaultsCommand,
-               "config-dump":     config.DumpCommand,
-               "controller":      controller.Command,
-               "crunch-run":      crunchrun.Command,
-               "dispatch-cloud":  dispatchcloud.Command,
-               "install":         install.Command,
-               "undelete":        undelete.Command,
-               "ws":              ws.Command,
+               "boot":               boot.Command,
+               "cloudtest":          cloudtest.Command,
+               "config-check":       config.CheckCommand,
+               "config-defaults":    config.DumpDefaultsCommand,
+               "config-dump":        config.DumpCommand,
+               "controller":         controller.Command,
+               "crunch-run":         crunchrun.Command,
+               "dispatch-cloud":     dispatchcloud.Command,
+               "install":            install.Command,
+               "recover-collection": recovercollection.Command,
+               "ws":                 ws.Command,
         })
  )
  
diff --git a/doc/_config.yml b/doc/_config.yml

index 48fe1b53d49149139cf36e0b602e0f0fe4d2ab3e..3b59cbca45205983ba4b83429f06b914946a53dd 100644 (file)
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -174,6 +174,7 @@ navbar:
        - admin/logs-table-management.html.textile.liquid
        - admin/workbench2-vocabulary.html.textile.liquid
        - admin/storage-classes.html.textile.liquid
+      - admin/recovering-deleted-collections.html.textile.liquid
      - Cloud:
        - admin/spot-instances.html.textile.liquid
        - admin/cloudtest.html.textile.liquid
diff --git a/doc/admin/recovering-deleted-collections.html.textile.liquid b/doc/admin/recovering-deleted-collections.html.textile.liquid

new file mode 100644 (file)

index 0000000..59c576c
--- /dev/null
+++ b/doc/admin/recovering-deleted-collections.html.textile.liquid
@@ -0,0 +1,37 @@
+---
+layout: default
+navsection: admin
+title: Recovering deleted collections
+...
+
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+In some cases, it is possible to recover files that have been lost by modifying or deleting a collection.
+
+Possibility of recovery depends on many factors, including:
+* Whether the collection manifest is still available, e.g., in an audit log entry
+* Whether the data blocks are also referenced by other collections
+* Whether the data blocks have been unreferenced long enough to be marked for deletion/trash by keep-balance
+* Blob signature TTL, trash lifetime, trash check interval, and other config settings
+
+To attempt recovery of a previous version of a deleted/modified collection, use the @arvados-server recover-collection@ command. It should be run on one of your server nodes where the @arvados-server@ package is installed and the @/etc/arvados/config.yml@ file is up to date.
+
+Specify the collection you want to recover by passing either the UUID of an audit log entry, or a file containing the manifest.
+
+If recovery is successful, the @recover-collection@ program saves the recovered data a new collection belonging to the system user, and prints the new collection's UUID on stdout.
+
+<pre>
+# arvados-server recover-collection 9tee4-57u5n-nb5awmk1pahac2t
+INFO[2020-06-05T19:52:29.557761245Z] loaded log entry                              logged_event_time="2020-06-05 16:48:01.438791 +0000 UTC" logged_event_type=update old_collection_uuid=9tee4-4zz18-1ex26g95epmgw5w src=9tee4-57u5n-nb5awmk1pahac2t
+INFO[2020-06-05T19:52:29.642145127Z] recovery succeeded                            UUID=9tee4-4zz18-5trfp4k4xxg97f1 src=9tee4-57u5n-nb5awmk1pahac2t
+9tee4-4zz18-5trfp4k4xxg97f1
+INFO[2020-06-05T19:52:29.644699436Z] exiting
+</pre>
+
+In this example, the original data has been restored and saved in a new collection with UUID @9tee4-4zz18-5trfp4k4xxg97f1@.
+
+For more options, run @arvados-server recover-collection -help@.
diff --git a/doc/install/setup-login.html.textile.liquid b/doc/install/setup-login.html.textile.liquid

index 572a83f7060ab925b8c4e39b45186bf898f893dc..aec82cfe2a583dd2eaf2d251532e4d46d625ff5e 100644 (file)
--- a/doc/install/setup-login.html.textile.liquid
+++ b/doc/install/setup-login.html.textile.liquid
@@ -56,6 +56,8 @@ With this configuration, users will sign in with a third-party OpenID Connect pr
          ClientSecret: "zzzzzzzzzzzzzzzzzzzzzzzz"
  </pre>
  
+Check the OpenIDConnect section in the "default config file":{{site.baseurl}}/admin/config.html for more details and configuration options.
+
  h2(#ldap). LDAP
  
  With this configuration, authentication uses an external LDAP service like OpenLDAP or Active Directory.
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml

index 219f6ef0ba91a1afb2e3311ca66b94f5a989020f..b9bc9c2c5ce4a28eb25015961b687cea449d503a 100644 (file)
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -569,6 +569,22 @@ Clusters:
          ClientID: ""
          ClientSecret: ""
  
+        # OpenID claim field containing the user's email
+        # address. Normally "email"; see
+        # https://openid.net/specs/openid-connect-core-1_0.html#StandardClaims
+        EmailClaim: "email"
+
+        # OpenID claim field containing the email verification
+        # flag. Normally "email_verified".  To accept every returned
+        # email address without checking a "verified" field at all,
+        # use the empty string "".
+        EmailVerifiedClaim: "email_verified"
+
+        # OpenID claim field containing the user's preferred
+        # username. If empty, use the mailbox part of the user's email
+        # address.
+        UsernameClaim: ""
+
        PAM:
          # (Experimental) Use PAM to authenticate users.
          Enable: false
diff --git a/lib/config/export.go b/lib/config/export.go

index fc4908c15929d7807d4c63033d61cbcef3b4bcab..0ad4222f551ba1220d2459f4330e9a1e05240d44 100644 (file)
--- a/lib/config/export.go
+++ b/lib/config/export.go
@@ -156,6 +156,9 @@ var whitelist = map[string]bool{
         "Login.OpenIDConnect.ClientSecret":             false,
         "Login.OpenIDConnect.Enable":                   true,
         "Login.OpenIDConnect.Issuer":                   false,
+       "Login.OpenIDConnect.EmailClaim":               false,
+       "Login.OpenIDConnect.EmailVerifiedClaim":       false,
+       "Login.OpenIDConnect.UsernameClaim":            false,
         "Login.PAM":                                    true,
         "Login.PAM.DefaultEmailDomain":                 false,
         "Login.PAM.Enable":                             true,
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go

index 6f8cab462bce2dc15118f31454b40bb35d06e3ff..758dc2677cf233b0d4d61462e7ec73d607f69174 100644 (file)
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -575,6 +575,22 @@ Clusters:
          ClientID: ""
          ClientSecret: ""
  
+        # OpenID claim field containing the user's email
+        # address. Normally "email"; see
+        # https://openid.net/specs/openid-connect-core-1_0.html#StandardClaims
+        EmailClaim: "email"
+
+        # OpenID claim field containing the email verification
+        # flag. Normally "email_verified".  To accept every returned
+        # email address without checking a "verified" field at all,
+        # use the empty string "".
+        EmailVerifiedClaim: "email_verified"
+
+        # OpenID claim field containing the user's preferred
+        # username. If empty, use the mailbox part of the user's email
+        # address.
+        UsernameClaim: ""
+
        PAM:
          # (Experimental) Use PAM to authenticate users.
          Enable: false
diff --git a/lib/controller/localdb/login.go b/lib/controller/localdb/login.go

index 9a0ee746e64006d08ab2d87981f6f47bf8fbcfa6..905cfed15c500d95689857e36c1c3323165c4d3d 100644 (file)
--- a/lib/controller/localdb/login.go
+++ b/lib/controller/localdb/login.go
@@ -37,14 +37,19 @@ func chooseLoginController(cluster *arvados.Cluster, railsProxy *railsProxy) log
                         ClientID:           cluster.Login.Google.ClientID,
                         ClientSecret:       cluster.Login.Google.ClientSecret,
                         UseGooglePeopleAPI: cluster.Login.Google.AlternateEmailAddresses,
+                       EmailClaim:         "email",
+                       EmailVerifiedClaim: "email_verified",
                 }
         case !wantGoogle && wantOpenIDConnect && !wantSSO && !wantPAM && !wantLDAP:
                 return &oidcLoginController{
-                       Cluster:      cluster,
-                       RailsProxy:   railsProxy,
-                       Issuer:       cluster.Login.OpenIDConnect.Issuer,
-                       ClientID:     cluster.Login.OpenIDConnect.ClientID,
-                       ClientSecret: cluster.Login.OpenIDConnect.ClientSecret,
+                       Cluster:            cluster,
+                       RailsProxy:         railsProxy,
+                       Issuer:             cluster.Login.OpenIDConnect.Issuer,
+                       ClientID:           cluster.Login.OpenIDConnect.ClientID,
+                       ClientSecret:       cluster.Login.OpenIDConnect.ClientSecret,
+                       EmailClaim:         cluster.Login.OpenIDConnect.EmailClaim,
+                       EmailVerifiedClaim: cluster.Login.OpenIDConnect.EmailVerifiedClaim,
+                       UsernameClaim:      cluster.Login.OpenIDConnect.UsernameClaim,
                 }
         case !wantGoogle && !wantOpenIDConnect && wantSSO && !wantPAM && !wantLDAP:
                 return &ssoLoginController{railsProxy}
diff --git a/lib/controller/localdb/login_oidc.go b/lib/controller/localdb/login_oidc.go

index f42b8f8beaf1d2721a78c9883c20353eabd0e43b..9274d75d7c9fdc1973cbcad621b306599e571893 100644 (file)
--- a/lib/controller/localdb/login_oidc.go
+++ b/lib/controller/localdb/login_oidc.go
@@ -36,7 +36,10 @@ type oidcLoginController struct {
         Issuer             string // OIDC issuer URL, e.g., "https://accounts.google.com"
         ClientID           string
         ClientSecret       string
-       UseGooglePeopleAPI bool // Use Google People API to look up alternate email addresses
+       UseGooglePeopleAPI bool   // Use Google People API to look up alternate email addresses
+       EmailClaim         string // OpenID claim to use as email address; typically "email"
+       EmailVerifiedClaim string // If non-empty, ensure claim value is true before accepting EmailClaim; typically "email_verified"
+       UsernameClaim      string // If non-empty, use as preferred username
  
         // override Google People API base URL for testing purposes
         // (normally empty, set by google pkg to
@@ -145,28 +148,29 @@ func (ctrl *oidcLoginController) getAuthInfo(ctx context.Context, token *oauth2.
         var ret rpc.UserSessionAuthInfo
         defer ctxlog.FromContext(ctx).WithField("ret", &ret).Debug("getAuthInfo returned")
  
-       var claims struct {
-               Name     string `json:"name"`
-               Email    string `json:"email"`
-               Verified bool   `json:"email_verified"`
-       }
+       var claims map[string]interface{}
         if err := idToken.Claims(&claims); err != nil {
                 return nil, fmt.Errorf("error extracting claims from ID token: %s", err)
-       } else if claims.Verified {
+       } else if verified, _ := claims[ctrl.EmailVerifiedClaim].(bool); verified || ctrl.EmailVerifiedClaim == "" {
                 // Fall back to this info if the People API call
                 // (below) doesn't return a primary && verified email.
-               if names := strings.Fields(strings.TrimSpace(claims.Name)); len(names) > 1 {
+               name, _ := claims["name"].(string)
+               if names := strings.Fields(strings.TrimSpace(name)); len(names) > 1 {
                         ret.FirstName = strings.Join(names[0:len(names)-1], " ")
                         ret.LastName = names[len(names)-1]
                 } else {
                         ret.FirstName = names[0]
                 }
-               ret.Email = claims.Email
+               ret.Email, _ = claims[ctrl.EmailClaim].(string)
+       }
+
+       if ctrl.UsernameClaim != "" {
+               ret.Username, _ = claims[ctrl.UsernameClaim].(string)
         }
  
         if !ctrl.UseGooglePeopleAPI {
                 if ret.Email == "" {
-                       return nil, fmt.Errorf("cannot log in with unverified email address %q", claims.Email)
+                       return nil, fmt.Errorf("cannot log in with unverified email address %q", claims[ctrl.EmailClaim])
                 }
                 return &ret, nil
         }
@@ -220,9 +224,13 @@ func (ctrl *oidcLoginController) getAuthInfo(ctx context.Context, token *oauth2.
                 return nil, errors.New("cannot log in without a verified email address")
         }
         for ae := range altEmails {
-               if ae != ret.Email {
-                       ret.AlternateEmails = append(ret.AlternateEmails, ae)
-                       if i := strings.Index(ae, "@"); i > 0 && strings.ToLower(ae[i+1:]) == strings.ToLower(ctrl.Cluster.Users.PreferDomainForUsername) {
+               if ae == ret.Email {
+                       continue
+               }
+               ret.AlternateEmails = append(ret.AlternateEmails, ae)
+               if ret.Username == "" {
+                       i := strings.Index(ae, "@")
+                       if i > 0 && strings.ToLower(ae[i+1:]) == strings.ToLower(ctrl.Cluster.Users.PreferDomainForUsername) {
                                 ret.Username = strings.SplitN(ae[:i], "+", 2)[0]
                         }
                 }
diff --git a/lib/controller/localdb/login_oidc_test.go b/lib/controller/localdb/login_oidc_test.go

index aa437218ff79eaae26ee93d8450af72561387919..1345e86900dd1056da5a9259bf0d5caf179253e5 100644 (file)
--- a/lib/controller/localdb/login_oidc_test.go
+++ b/lib/controller/localdb/login_oidc_test.go
@@ -113,6 +113,9 @@ func (s *OIDCLoginSuite) SetUpTest(c *check.C) {
                                 "email":          s.authEmail,
                                 "email_verified": s.authEmailVerified,
                                 "name":           s.authName,
+                               "alt_verified":   true,                    // for custom claim tests
+                               "alt_email":      "alt_email@example.com", // for custom claim tests
+                               "alt_username":   "desired-username",      // for custom claim tests
                         })
                         json.NewEncoder(w).Encode(struct {
                                 AccessToken  string `json:"access_token"`
@@ -299,7 +302,7 @@ func (s *OIDCLoginSuite) TestGoogleLogin_PeopleAPIError(c *check.C) {
         c.Check(resp.RedirectLocation, check.Equals, "")
  }
  
-func (s *OIDCLoginSuite) TestOIDCLogin_Success(c *check.C) {
+func (s *OIDCLoginSuite) TestGenericOIDCLogin(c *check.C) {
         s.cluster.Login.Google.Enable = false
         s.cluster.Login.OpenIDConnect.Enable = true
         json.Unmarshal([]byte(fmt.Sprintf("%q", s.fakeIssuer.URL)), &s.cluster.Login.OpenIDConnect.Issuer)
@@ -307,18 +310,91 @@ func (s *OIDCLoginSuite) TestOIDCLogin_Success(c *check.C) {
         s.cluster.Login.OpenIDConnect.ClientSecret = "oidc#client#secret"
         s.validClientID = "oidc#client#id"
         s.validClientSecret = "oidc#client#secret"
-       s.localdb = NewConn(s.cluster)
-       state := s.startLogin(c)
-       resp, err := s.localdb.Login(context.Background(), arvados.LoginOptions{
-               Code:  s.validCode,
-               State: state,
-       })
-       c.Assert(err, check.IsNil)
-       c.Check(resp.HTML.String(), check.Equals, "")
-       target, err := url.Parse(resp.RedirectLocation)
-       c.Assert(err, check.IsNil)
-       token := target.Query().Get("api_token")
-       c.Check(token, check.Matches, `v2/zzzzz-gj3su-.{15}/.{32,50}`)
+       for _, trial := range []struct {
+               expectEmail string // "" if failure expected
+               setup       func()
+       }{
+               {
+                       expectEmail: "user@oidc.example.com",
+                       setup: func() {
+                               c.Log("=== succeed because email_verified is false but not required")
+                               s.authEmail = "user@oidc.example.com"
+                               s.authEmailVerified = false
+                               s.cluster.Login.OpenIDConnect.EmailClaim = "email"
+                               s.cluster.Login.OpenIDConnect.EmailVerifiedClaim = ""
+                               s.cluster.Login.OpenIDConnect.UsernameClaim = ""
+                       },
+               },
+               {
+                       expectEmail: "",
+                       setup: func() {
+                               c.Log("=== fail because email_verified is false and required")
+                               s.authEmail = "user@oidc.example.com"
+                               s.authEmailVerified = false
+                               s.cluster.Login.OpenIDConnect.EmailClaim = "email"
+                               s.cluster.Login.OpenIDConnect.EmailVerifiedClaim = "email_verified"
+                               s.cluster.Login.OpenIDConnect.UsernameClaim = ""
+                       },
+               },
+               {
+                       expectEmail: "user@oidc.example.com",
+                       setup: func() {
+                               c.Log("=== succeed because email_verified is false but config uses custom 'verified' claim")
+                               s.authEmail = "user@oidc.example.com"
+                               s.authEmailVerified = false
+                               s.cluster.Login.OpenIDConnect.EmailClaim = "email"
+                               s.cluster.Login.OpenIDConnect.EmailVerifiedClaim = "alt_verified"
+                               s.cluster.Login.OpenIDConnect.UsernameClaim = ""
+                       },
+               },
+               {
+                       expectEmail: "alt_email@example.com",
+                       setup: func() {
+                               c.Log("=== succeed with custom 'email' and 'email_verified' claims")
+                               s.authEmail = "bad@wrong.example.com"
+                               s.authEmailVerified = false
+                               s.cluster.Login.OpenIDConnect.EmailClaim = "alt_email"
+                               s.cluster.Login.OpenIDConnect.EmailVerifiedClaim = "alt_verified"
+                               s.cluster.Login.OpenIDConnect.UsernameClaim = "alt_username"
+                       },
+               },
+       } {
+               trial.setup()
+               if s.railsSpy != nil {
+                       s.railsSpy.Close()
+               }
+               s.railsSpy = arvadostest.NewProxy(c, s.cluster.Services.RailsAPI)
+               s.localdb = NewConn(s.cluster)
+               *s.localdb.railsProxy = *rpc.NewConn(s.cluster.ClusterID, s.railsSpy.URL, true, rpc.PassthroughTokenProvider)
+
+               state := s.startLogin(c)
+               resp, err := s.localdb.Login(context.Background(), arvados.LoginOptions{
+                       Code:  s.validCode,
+                       State: state,
+               })
+               c.Assert(err, check.IsNil)
+               if trial.expectEmail == "" {
+                       c.Check(resp.HTML.String(), check.Matches, `(?ms).*Login error.*`)
+                       c.Check(resp.RedirectLocation, check.Equals, "")
+                       continue
+               }
+               c.Check(resp.HTML.String(), check.Equals, "")
+               target, err := url.Parse(resp.RedirectLocation)
+               c.Assert(err, check.IsNil)
+               token := target.Query().Get("api_token")
+               c.Check(token, check.Matches, `v2/zzzzz-gj3su-.{15}/.{32,50}`)
+               authinfo := getCallbackAuthInfo(c, s.railsSpy)
+               c.Check(authinfo.Email, check.Equals, trial.expectEmail)
+
+               switch s.cluster.Login.OpenIDConnect.UsernameClaim {
+               case "alt_username":
+                       c.Check(authinfo.Username, check.Equals, "desired-username")
+               case "":
+                       c.Check(authinfo.Username, check.Equals, "")
+               default:
+                       c.Fail() // bad test case
+               }
+       }
  }
  
  func (s *OIDCLoginSuite) TestGoogleLogin_Success(c *check.C) {
diff --git a/lib/undelete/cmd.go b/lib/recovercollection/cmd.go

similarity index 62%

rename from lib/undelete/cmd.go

rename to lib/recovercollection/cmd.go

index 09adfae3c6ca8002fae8477666434038031b3e32..cea4607c98fe533fec8b37f839f1f641ce3fcccb 100644 (file)
--- a/lib/undelete/cmd.go
+++ b/lib/recovercollection/cmd.go
@@ -2,7 +2,7 @@
  //
  // SPDX-License-Identifier: AGPL-3.0
  
-package undelete
+package recovercollection
  
  import (
         "context"
@@ -42,7 +42,7 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
         flags.SetOutput(stderr)
         flags.Usage = func() {
                 fmt.Fprintf(flags.Output(), `Usage:
-       %s [options ...] /path/to/manifest.txt [...]
+       %s [options ...] { /path/to/manifest.txt | log-or-collection-uuid } [...]
  
         This program recovers deleted collections. Recovery is
         possible when the collection's manifest is still available and
@@ -52,10 +52,25 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
         collections, or the blocks have been trashed but not yet
         deleted).
  
+       There are multiple ways to specify a collection to recover:
+
+        * Path to a local file containing a manifest with the desired
+         data
+
+       * UUID of an Arvados log entry, typically a "delete" or
+         "update" event, whose "old attributes" have a manifest with
+         the desired data
+
+       * UUID of an Arvados collection whose most recent log entry,
+          typically a "delete" or "update" event, has the desired
+          data in its "old attributes"
+
         For each provided collection manifest, once all data blocks
         are recovered/protected from garbage collection, a new
         collection is saved and its UUID is printed on stdout.
  
+       Restored collections will belong to the system (root) user.
+
         Exit status will be zero if recovery is successful, i.e., a
         collection is saved for each provided manifest.
  Options:
@@ -96,7 +111,7 @@ Options:
                 return 1
         }
         client.AuthToken = cluster.SystemRootToken
-       und := undeleter{
+       rcvr := recoverer{
                 client:  client,
                 cluster: cluster,
                 logger:  logger,
@@ -105,31 +120,81 @@ Options:
         exitcode := 0
         for _, src := range flags.Args() {
                 logger := logger.WithField("src", src)
-               if len(src) == 27 && src[5:12] == "-57u5n-" {
-                       logger.Error("log entry lookup not implemented")
-                       exitcode = 1
-                       continue
-               } else {
-                       mtxt, err := ioutil.ReadFile(src)
+               var mtxt string
+               if !strings.Contains(src, "/") && len(src) == 27 && src[5] == '-' && src[11] == '-' {
+                       var filters []arvados.Filter
+                       if src[5:12] == "-57u5n-" {
+                               filters = []arvados.Filter{{"uuid", "=", src}}
+                       } else if src[5:12] == "-4zz18-" {
+                               filters = []arvados.Filter{{"object_uuid", "=", src}}
+                       } else {
+                               logger.Error("looks like a UUID but not a log or collection UUID (if it's really a file, prepend './')")
+                               exitcode = 1
+                               continue
+                       }
+                       var resp struct {
+                               Items []struct {
+                                       UUID       string    `json:"uuid"`
+                                       EventType  string    `json:"event_type"`
+                                       EventAt    time.Time `json:"event_at"`
+                                       ObjectUUID string    `json:"object_uuid"`
+                                       Properties struct {
+                                               OldAttributes struct {
+                                                       ManifestText string `json:"manifest_text"`
+                                               } `json:"old_attributes"`
+                                       } `json:"properties"`
+                               }
+                       }
+                       err = client.RequestAndDecode(&resp, "GET", "arvados/v1/logs", nil, arvados.ListOptions{
+                               Limit:   1,
+                               Order:   []string{"event_at desc"},
+                               Filters: filters,
+                       })
                         if err != nil {
-                               logger.WithError(err).Error("error loading manifest data")
+                               logger.WithError(err).Error("error looking up log entry")
+                               exitcode = 1
+                               continue
+                       } else if len(resp.Items) == 0 {
+                               logger.Error("log entry not found")
+                               exitcode = 1
+                               continue
+                       }
+                       logent := resp.Items[0]
+                       logger.WithFields(logrus.Fields{
+                               "uuid":                logent.UUID,
+                               "old_collection_uuid": logent.ObjectUUID,
+                               "logged_event_type":   logent.EventType,
+                               "logged_event_time":   logent.EventAt,
+                               "logged_object_uuid":  logent.ObjectUUID,
+                       }).Info("loaded log entry")
+                       mtxt = logent.Properties.OldAttributes.ManifestText
+                       if mtxt == "" {
+                               logger.Error("log entry properties.old_attributes.manifest_text missing or empty")
                                 exitcode = 1
                                 continue
                         }
-                       uuid, err := und.RecoverManifest(string(mtxt))
+               } else {
+                       buf, err := ioutil.ReadFile(src)
                         if err != nil {
-                               logger.WithError(err).Error("recovery failed")
+                               logger.WithError(err).Error("failed to load manifest data from file")
                                 exitcode = 1
                                 continue
                         }
-                       logger.WithField("UUID", uuid).Info("recovery succeeded")
-                       fmt.Fprintln(stdout, uuid)
+                       mtxt = string(buf)
+               }
+               uuid, err := rcvr.RecoverManifest(string(mtxt))
+               if err != nil {
+                       logger.WithError(err).Error("recovery failed")
+                       exitcode = 1
+                       continue
                 }
+               logger.WithField("UUID", uuid).Info("recovery succeeded")
+               fmt.Fprintln(stdout, uuid)
         }
         return exitcode
  }
  
-type undeleter struct {
+type recoverer struct {
         client  *arvados.Client
         cluster *arvados.Cluster
         logger  logrus.FieldLogger
@@ -139,8 +204,8 @@ var errNotFound = errors.New("not found")
  
  // Finds the timestamp of the newest copy of blk on svc. Returns
  // errNotFound if blk is not on svc at all.
-func (und undeleter) newestMtime(logger logrus.FieldLogger, blk string, svc arvados.KeepService) (time.Time, error) {
-       found, err := svc.Index(und.client, blk)
+func (rcvr recoverer) newestMtime(logger logrus.FieldLogger, blk string, svc arvados.KeepService) (time.Time, error) {
+       found, err := svc.Index(rcvr.client, blk)
         if err != nil {
                 logger.WithError(err).Warn("error getting index")
                 return time.Time{}, err
@@ -170,17 +235,17 @@ var errTouchIneffective = errors.New("(BUG?) touch succeeded but had no effect -
  // decide to trash it, all before our recovered collection gets
  // saved. But if the block's timestamp is more recent than blobsigttl,
  // keepstore will refuse to trash it even if told to by keep-balance.
-func (und undeleter) ensureSafe(ctx context.Context, logger logrus.FieldLogger, blk string, svc arvados.KeepService, blobsigttl time.Duration, blobsigexp time.Time) error {
-       if latest, err := und.newestMtime(logger, blk, svc); err != nil {
+func (rcvr recoverer) ensureSafe(ctx context.Context, logger logrus.FieldLogger, blk string, svc arvados.KeepService, blobsigttl time.Duration, blobsigexp time.Time) error {
+       if latest, err := rcvr.newestMtime(logger, blk, svc); err != nil {
                 return err
         } else if latest.Add(blobsigttl).After(blobsigexp) {
                 return nil
         }
-       if err := svc.Touch(ctx, und.client, blk); err != nil {
+       if err := svc.Touch(ctx, rcvr.client, blk); err != nil {
                 return fmt.Errorf("error updating timestamp: %s", err)
         }
         logger.Debug("updated timestamp")
-       if latest, err := und.newestMtime(logger, blk, svc); err == errNotFound {
+       if latest, err := rcvr.newestMtime(logger, blk, svc); err == errNotFound {
                 return fmt.Errorf("(BUG?) touch succeeded, but then block did not appear in index")
         } else if err != nil {
                 return err
@@ -194,7 +259,7 @@ func (und undeleter) ensureSafe(ctx context.Context, logger logrus.FieldLogger,
  // Untrash and update GC timestamps (as needed) on blocks referenced
  // by the given manifest, save a new collection and return the new
  // collection's UUID.
-func (und undeleter) RecoverManifest(mtxt string) (string, error) {
+func (rcvr recoverer) RecoverManifest(mtxt string) (string, error) {
         ctx, cancel := context.WithCancel(context.Background())
         defer cancel()
  
@@ -210,9 +275,9 @@ func (und undeleter) RecoverManifest(mtxt string) (string, error) {
         go close(todo)
  
         var services []arvados.KeepService
-       err = und.client.EachKeepService(func(svc arvados.KeepService) error {
+       err = rcvr.client.EachKeepService(func(svc arvados.KeepService) error {
                 if svc.ServiceType == "proxy" {
-                       und.logger.WithField("service", svc).Debug("ignore proxy service")
+                       rcvr.logger.WithField("service", svc).Debug("ignore proxy service")
                 } else {
                         services = append(services, svc)
                 }
@@ -221,7 +286,7 @@ func (und undeleter) RecoverManifest(mtxt string) (string, error) {
         if err != nil {
                 return "", fmt.Errorf("error getting list of keep services: %s", err)
         }
-       und.logger.WithField("services", services).Debug("got list of services")
+       rcvr.logger.WithField("services", services).Debug("got list of services")
  
         // blobsigexp is our deadline for saving the rescued
         // collection. This must be less than BlobSigningTTL
@@ -235,9 +300,9 @@ func (und undeleter) RecoverManifest(mtxt string) (string, error) {
         // would have lived long enough anyway if left alone.
         // BlobSigningTTL/2 (typically around 1 week) is much longer
         // than than we need to recover even a very large collection.
-       blobsigttl := und.cluster.Collections.BlobSigningTTL.Duration()
+       blobsigttl := rcvr.cluster.Collections.BlobSigningTTL.Duration()
         blobsigexp := time.Now().Add(blobsigttl / 2)
-       und.logger.WithField("blobsigexp", blobsigexp).Debug("chose save deadline")
+       rcvr.logger.WithField("blobsigexp", blobsigexp).Debug("chose save deadline")
  
         // We'll start a number of threads, each working on
         // checking/recovering one block at a time. The threads
@@ -255,18 +320,18 @@ func (und undeleter) RecoverManifest(mtxt string) (string, error) {
                 nextblk:
                         for idx := range todo {
                                 blk := strings.SplitN(string(blks[idx]), "+", 2)[0]
-                               logger := und.logger.WithField("block", blk)
+                               logger := rcvr.logger.WithField("block", blk)
                                 for _, untrashing := range []bool{false, true} {
                                         for _, svc := range services {
                                                 logger := logger.WithField("service", fmt.Sprintf("%s:%d", svc.ServiceHost, svc.ServicePort))
                                                 if untrashing {
-                                                       if err := svc.Untrash(ctx, und.client, blk); err != nil {
+                                                       if err := svc.Untrash(ctx, rcvr.client, blk); err != nil {
                                                                 logger.WithError(err).Debug("untrash failed")
                                                                 continue
                                                         }
                                                         logger.Info("untrashed")
                                                 }
-                                               err := und.ensureSafe(ctx, logger, blk, svc, blobsigttl, blobsigexp)
+                                               err := rcvr.ensureSafe(ctx, logger, blk, svc, blobsigttl, blobsigexp)
                                                 if err == errNotFound {
                                                         logger.Debug(err)
                                                 } else if err != nil {
@@ -293,17 +358,17 @@ func (und undeleter) RecoverManifest(mtxt string) (string, error) {
         }
         if havenot > 0 {
                 if have > 0 {
-                       und.logger.Warn("partial recovery is not implemented")
+                       rcvr.logger.Warn("partial recovery is not implemented")
                 }
                 return "", fmt.Errorf("unable to recover %d of %d blocks", havenot, have+havenot)
         }
  
-       if und.cluster.Collections.BlobSigning {
-               key := []byte(und.cluster.Collections.BlobSigningKey)
-               coll.ManifestText = arvados.SignManifest(coll.ManifestText, und.client.AuthToken, blobsigexp, blobsigttl, key)
+       if rcvr.cluster.Collections.BlobSigning {
+               key := []byte(rcvr.cluster.Collections.BlobSigningKey)
+               coll.ManifestText = arvados.SignManifest(coll.ManifestText, rcvr.client.AuthToken, blobsigexp, blobsigttl, key)
         }
-       und.logger.WithField("manifest", coll.ManifestText).Debug("updated blob signatures in manifest")
-       err = und.client.RequestAndDecodeContext(ctx, &coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
+       rcvr.logger.WithField("manifest", coll.ManifestText).Debug("updated blob signatures in manifest")
+       err = rcvr.client.RequestAndDecodeContext(ctx, &coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
                 "collection": map[string]interface{}{
                         "manifest_text": coll.ManifestText,
                 },
@@ -311,6 +376,6 @@ func (und undeleter) RecoverManifest(mtxt string) (string, error) {
         if err != nil {
                 return "", fmt.Errorf("error saving new collection: %s", err)
         }
-       und.logger.WithField("UUID", coll.UUID).Debug("created new collection")
+       rcvr.logger.WithField("UUID", coll.UUID).Debug("created new collection")
         return coll.UUID, nil
  }
diff --git a/lib/undelete/cmd_test.go b/lib/recovercollection/cmd_test.go

similarity index 76%

rename from lib/undelete/cmd_test.go

rename to lib/recovercollection/cmd_test.go

index a5edaf90b3190d305b09a7f019e34b92992ab609..57c2c64cdab01289911043c5767ed92edbcd8c36 100644 (file)
--- a/lib/undelete/cmd_test.go
+++ b/lib/recovercollection/cmd_test.go
@@ -2,7 +2,7 @@
  //
  // SPDX-License-Identifier: AGPL-3.0
  
-package undelete
+package recovercollection
  
  import (
         "bytes"
@@ -36,7 +36,7 @@ func (*Suite) TestUnrecoverableBlock(c *check.C) {
         mfile := tmp + "/manifest"
         ioutil.WriteFile(mfile, []byte(". aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+410 0:410:Gone\n"), 0777)
         var stdout, stderr bytes.Buffer
-       exitcode := Command.RunCommand("undelete.test", []string{"-log-level=debug", mfile}, &bytes.Buffer{}, &stdout, &stderr)
+       exitcode := Command.RunCommand("recovercollection.test", []string{"-log-level=debug", mfile}, &bytes.Buffer{}, &stdout, &stderr)
         c.Check(exitcode, check.Equals, 1)
         c.Check(stdout.String(), check.Equals, "")
         c.Log(stderr.String())
@@ -93,7 +93,7 @@ func (*Suite) TestUntrashAndTouchBlock(c *check.C) {
         }
  
         var stdout, stderr bytes.Buffer
-       exitcode := Command.RunCommand("undelete.test", []string{"-log-level=debug", mfile}, &bytes.Buffer{}, &stdout, &stderr)
+       exitcode := Command.RunCommand("recovercollection.test", []string{"-log-level=debug", mfile}, &bytes.Buffer{}, &stdout, &stderr)
         c.Check(exitcode, check.Equals, 0)
         c.Check(stdout.String(), check.Matches, `zzzzz-4zz18-.{15}\n`)
         c.Log(stderr.String())
@@ -115,3 +115,22 @@ func (*Suite) TestUntrashAndTouchBlock(c *check.C) {
         }
         c.Check(found, check.Equals, true)
  }
+
+func (*Suite) TestUnusableManifestSourceArg(c *check.C) {
+       for _, trial := range []struct {
+               srcArg    string
+               errRegexp string
+       }{
+               {"zzzzz-4zz18-aaaaaaaaaaaaaaa", `(?ms).*msg="log entry not found".*`},
+               {"zzzzz-57u5n-aaaaaaaaaaaaaaa", `(?ms).*msg="log entry not found.*`},
+               {"zzzzz-57u5n-containerlog006", `(?ms).*msg="log entry properties\.old_attributes\.manifest_text missing or empty".*`},
+               {"zzzzz-j7d0g-aaaaaaaaaaaaaaa", `(?ms).*msg="looks like a UUID but not a log or collection UUID.*`},
+       } {
+               var stdout, stderr bytes.Buffer
+               exitcode := Command.RunCommand("recovercollection.test", []string{"-log-level=debug", trial.srcArg}, &bytes.Buffer{}, &stdout, &stderr)
+               c.Check(exitcode, check.Equals, 1)
+               c.Check(stdout.String(), check.Equals, "")
+               c.Log(stderr.String())
+               c.Check(stderr.String(), check.Matches, trial.errRegexp)
+       }
+}
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go

index dbd9f71099619203bb38f4dd1118b865f5c2f662..029e223218b2a5136b8eac2238b088e2ce4fb983 100644 (file)
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -157,10 +157,13 @@ type Cluster struct {
                         AlternateEmailAddresses bool
                 }
                 OpenIDConnect struct {
-                       Enable       bool
-                       Issuer       string
-                       ClientID     string
-                       ClientSecret string
+                       Enable             bool
+                       Issuer             string
+                       ClientID           string
+                       ClientSecret       string
+                       EmailClaim         string
+                       EmailVerifiedClaim string
+                       UsernameClaim      string
                 }
                 PAM struct {
                         Enable             bool
diff --git a/services/api/app/controllers/database_controller.rb b/services/api/app/controllers/database_controller.rb

index d6045a5dcbf35a3c786bb6db5105d49e9636cc39..5c4cf7bc16c22ad8d8780714d9b0165cf2c4043b 100644 (file)
--- a/services/api/app/controllers/database_controller.rb
+++ b/services/api/app/controllers/database_controller.rb
@@ -75,9 +75,10 @@ class DatabaseController < ApplicationController
        raise
      end
  
-    require 'refresh_permission_view'
+    require 'update_permissions'
  
-    refresh_permission_view
+    refresh_permissions
+    refresh_trashed
  
      # Done.
      send_json success: true
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb

index 816dbf4758dd0baa6e4ca438434b0b770fd1c0b7..8afebfb79eab56e24e83394f3923469d28faba94 100644 (file)
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -285,10 +285,13 @@ class ArvadosModel < ApplicationRecord
      sql_conds = nil
      user_uuids = users_list.map { |u| u.uuid }
  
+    # For details on how the trashed_groups table is constructed, see
+    # see db/migrate/20200501150153_permission_table.rb
+
      exclude_trashed_records = ""
      if !include_trash and (sql_table == "groups" or sql_table == "collections") then
-      # Only include records that are not explicitly trashed
-      exclude_trashed_records = "AND #{sql_table}.is_trashed = false"
+      # Only include records that are not trashed
+      exclude_trashed_records = "AND (#{sql_table}.trash_at is NULL or #{sql_table}.trash_at > statement_timestamp())"
      end
  
      if users_list.select { |u| u.is_admin }.any?
@@ -296,16 +299,28 @@ class ArvadosModel < ApplicationRecord
        if !include_trash
          if sql_table != "api_client_authorizations"
            # Only include records where the owner is not trashed
-          sql_conds = "#{sql_table}.owner_uuid NOT IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
-                      "WHERE trashed = 1) #{exclude_trashed_records}"
+          sql_conds = "#{sql_table}.owner_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} "+
+                      "where trash_at <= statement_timestamp()) #{exclude_trashed_records}"
          end
        end
      else
        trashed_check = ""
        if !include_trash then
-        trashed_check = "AND trashed = 0"
+        trashed_check = "AND target_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} where trash_at <= statement_timestamp())"
        end
  
+      # The core of the permission check is a join against the
+      # materialized_permissions table to determine if the user has at
+      # least read permission to either the object itself or its
+      # direct owner (if traverse_owned is true).  See
+      # db/migrate/20200501150153_permission_table.rb for details on
+      # how the permissions are computed.
+
+      # A user can have can_manage access to another user, this grants
+      # full access to all that user's stuff.  To implement that we
+      # need to include those other users in the permission query.
+      user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: ":user_uuids", perm_level: 1}
+
        # Note: it is possible to combine the direct_check and
        # owner_check into a single EXISTS() clause, however it turns
        # out query optimizer doesn't like it and forces a sequential
@@ -316,13 +331,28 @@ class ArvadosModel < ApplicationRecord
  
        # Match a direct read permission link from the user to the record uuid
        direct_check = "#{sql_table}.uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
-                     "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check})"
+                     "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check})"
  
-      # Match a read permission link from the user to the record's owner_uuid
+      # Match a read permission for the user to the record's
+      # owner_uuid.  This is so we can have a permissions table that
+      # mostly consists of users and groups (projects are a type of
+      # group) and not have to compute and list user permission to
+      # every single object in the system.
+      #
+      # Don't do this for API keys (special behavior) or groups
+      # (already covered by direct_check).
+      #
+      # The traverse_owned flag indicates whether the permission to
+      # read an object also implies transitive permission to read
+      # things the object owns.  The situation where this is important
+      # are determining if we can read an object owned by another
+      # user.  This makes it possible to have permission to read the
+      # user record without granting permission to read things the
+      # other user owns.
        owner_check = ""
        if sql_table != "api_client_authorizations" and sql_table != "groups" then
          owner_check = "OR #{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
-          "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND target_owner_uuid IS NOT NULL) "
+          "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check} AND traverse_owned) "
        end
  
        links_cond = ""
@@ -331,7 +361,7 @@ class ArvadosModel < ApplicationRecord
          # users some permission _or_ gives anyone else permission to
          # view one of the authorized users.
          links_cond = "OR (#{sql_table}.link_class IN (:permission_link_classes) AND "+
-                       "(#{sql_table}.head_uuid IN (:user_uuids) OR #{sql_table}.tail_uuid IN (:user_uuids)))"
+                       "(#{sql_table}.head_uuid IN (#{user_uuids_subquery}) OR #{sql_table}.tail_uuid IN (#{user_uuids_subquery})))"
        end
  
        sql_conds = "(#{direct_check} #{owner_check} #{links_cond}) #{exclude_trashed_records}"
diff --git a/services/api/app/models/database_seeds.rb b/services/api/app/models/database_seeds.rb

index 6e7ab9b07677421ca76dcd6ddb57bc98ff8cb7a8..39f491503ee583033f80ae72ef982261c6ba0af9 100644 (file)
--- a/services/api/app/models/database_seeds.rb
+++ b/services/api/app/models/database_seeds.rb
@@ -2,6 +2,8 @@
  #
  # SPDX-License-Identifier: AGPL-3.0
  
+require 'update_permissions'
+
  class DatabaseSeeds
    extend CurrentApiClient
    def self.install
@@ -12,5 +14,7 @@ class DatabaseSeeds
      anonymous_group_read_permission
      anonymous_user
      empty_collection
+    refresh_permissions
+    refresh_trashed
    end
  end
diff --git a/services/api/app/models/group.rb b/services/api/app/models/group.rb

index 1f2b0d8b776a1f63ca94d0e3e7654e7f9cd5887b..36814a3163e074433dfb3f5beeffb77510740475 100644 (file)
--- a/services/api/app/models/group.rb
+++ b/services/api/app/models/group.rb
@@ -17,9 +17,15 @@ class Group < ArvadosModel
    attribute :properties, :jsonbHash, default: {}
  
    validate :ensure_filesystem_compatible_name
-  after_create :invalidate_permissions_cache
-  after_update :maybe_invalidate_permissions_cache
    before_create :assign_name
+  after_create :after_ownership_change
+  after_create :update_trash
+
+  before_update :before_ownership_change
+  after_update :after_ownership_change
+
+  after_update :update_trash
+  before_destroy :clear_permissions_and_trash
  
    api_accessible :user, extend: :common do |t|
      t.add :name
@@ -38,18 +44,58 @@ class Group < ArvadosModel
      super if group_class == 'project'
    end
  
-  def maybe_invalidate_permissions_cache
-    if uuid_changed? or owner_uuid_changed? or is_trashed_changed?
-      # This can change users' permissions on other groups as well as
-      # this one.
-      invalidate_permissions_cache
+  def update_trash
+    if trash_at_changed? or owner_uuid_changed?
+      # The group was added or removed from the trash.
+      #
+      # Strategy:
+      #   Compute project subtree, propagating trash_at to subprojects
+      #   Remove groups that don't belong from trash
+      #   Add/update groups that do belong in the trash
+
+      temptable = "group_subtree_#{rand(2**64).to_s(10)}"
+      ActiveRecord::Base.connection.exec_query %{
+create temporary table #{temptable} on commit drop
+as select * from project_subtree_with_trash_at($1, LEAST($2, $3)::timestamp)
+},
+                                               'Group.update_trash.select',
+                                               [[nil, self.uuid],
+                                                [nil, TrashedGroup.find_by_group_uuid(self.owner_uuid).andand.trash_at],
+                                                [nil, self.trash_at]]
+
+      ActiveRecord::Base.connection.exec_delete %{
+delete from trashed_groups where group_uuid in (select target_uuid from #{temptable} where trash_at is NULL);
+},
+                                            "Group.update_trash.delete"
+
+      ActiveRecord::Base.connection.exec_query %{
+insert into trashed_groups (group_uuid, trash_at)
+  select target_uuid as group_uuid, trash_at from #{temptable} where trash_at is not NULL
+on conflict (group_uuid) do update set trash_at=EXCLUDED.trash_at;
+},
+                                            "Group.update_trash.insert"
+    end
+  end
+
+  def before_ownership_change
+    if owner_uuid_changed? and !self.owner_uuid_was.nil?
+      MaterializedPermission.where(user_uuid: owner_uuid_was, target_uuid: uuid).delete_all
+      update_permissions self.owner_uuid_was, self.uuid, REVOKE_PERM
+    end
+  end
+
+  def after_ownership_change
+    if owner_uuid_changed?
+      update_permissions self.owner_uuid, self.uuid, CAN_MANAGE_PERM
      end
    end
  
-  def invalidate_permissions_cache
-    # Ensure a new group can be accessed by the appropriate users
-    # immediately after being created.
-    User.invalidate_permissions_cache self.async_permissions_update
+  def clear_permissions_and_trash
+    MaterializedPermission.where(target_uuid: uuid).delete_all
+    ActiveRecord::Base.connection.exec_delete %{
+delete from trashed_groups where group_uuid=$1
+}, "Group.clear_permissions_and_trash", [[nil, self.uuid]]
+
    end
  
    def assign_name
diff --git a/services/api/app/models/link.rb b/services/api/app/models/link.rb

index ad7800fe679cb91936bde76f00566873cb369419..21d89767c7139a0c8b7ae8eb67595d6ebe336110 100644 (file)
--- a/services/api/app/models/link.rb
+++ b/services/api/app/models/link.rb
@@ -11,12 +11,13 @@ class Link < ArvadosModel
    # already know how to properly treat them.
    attribute :properties, :jsonbHash, default: {}
  
+  validate :name_links_are_obsolete
    before_create :permission_to_attach_to_objects
    before_update :permission_to_attach_to_objects
-  after_update :maybe_invalidate_permissions_cache
-  after_create :maybe_invalidate_permissions_cache
-  after_destroy :maybe_invalidate_permissions_cache
-  validate :name_links_are_obsolete
+  after_update :call_update_permissions
+  after_create :call_update_permissions
+  before_destroy :clear_permissions
+  after_destroy :check_permissions
  
    api_accessible :user, extend: :common do |t|
      t.add :tail_uuid
@@ -64,15 +65,28 @@ class Link < ArvadosModel
      false
    end
  
-  def maybe_invalidate_permissions_cache
+  PERM_LEVEL = {
+    'can_read' => 1,
+    'can_login' => 1,
+    'can_write' => 2,
+    'can_manage' => 3,
+  }
+
+  def call_update_permissions
+    if self.link_class == 'permission'
+      update_permissions tail_uuid, head_uuid, PERM_LEVEL[name], self.uuid
+    end
+  end
+
+  def clear_permissions
+    if self.link_class == 'permission'
+      update_permissions tail_uuid, head_uuid, REVOKE_PERM, self.uuid
+    end
+  end
+
+  def check_permissions
      if self.link_class == 'permission'
-      # Clearing the entire permissions cache can generate many
-      # unnecessary queries if many active users are not affected by
-      # this change. In such cases it would be better to search cached
-      # permissions for head_uuid and tail_uuid, and invalidate the
-      # cache for only those users. (This would require a browseable
-      # cache.)
-      User.invalidate_permissions_cache
+      check_permissions_against_full_refresh
      end
    end
  
diff --git a/services/api/app/models/materialized_permission.rb b/services/api/app/models/materialized_permission.rb

new file mode 100644 (file)

index 0000000..24ba673
--- /dev/null
+++ b/services/api/app/models/materialized_permission.rb
@@ -0,0 +1,6 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class MaterializedPermission < ApplicationRecord
+end
diff --git a/services/api/app/models/trashed_group.rb b/services/api/app/models/trashed_group.rb

new file mode 100644 (file)

index 0000000..5c85946
--- /dev/null
+++ b/services/api/app/models/trashed_group.rb
@@ -0,0 +1,6 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class TrashedGroup < ApplicationRecord
+end
diff --git a/services/api/app/models/user.rb b/services/api/app/models/user.rb

index c3641b64e84f04217145edacab05ac8d84f259a7..64facaa98e84c2eacfdc6fed38372f2dff22fdde 100644 (file)
--- a/services/api/app/models/user.rb
+++ b/services/api/app/models/user.rb
@@ -3,7 +3,6 @@
  # SPDX-License-Identifier: AGPL-3.0
  
  require 'can_be_an_owner'
-require 'refresh_permission_view'
  
  class User < ArvadosModel
    include HasUuid
@@ -28,25 +27,31 @@ class User < ArvadosModel
      user.username.nil? and user.username_changed?
    }
    before_update :setup_on_activate
+
    before_create :check_auto_admin
    before_create :set_initial_username, :if => Proc.new { |user|
      user.username.nil? and user.email
    }
+  after_create :after_ownership_change
    after_create :setup_on_activate
    after_create :add_system_group_permission_link
-  after_create :invalidate_permissions_cache
    after_create :auto_setup_new_user, :if => Proc.new { |user|
      Rails.configuration.Users.AutoSetupNewUsers and
      (user.uuid != system_user_uuid) and
      (user.uuid != anonymous_user_uuid)
    }
    after_create :send_admin_notifications
+
+  before_update :before_ownership_change
+  after_update :after_ownership_change
    after_update :send_profile_created_notification
    after_update :sync_repository_names, :if => Proc.new { |user|
      (user.uuid != system_user_uuid) and
      user.username_changed? and
      (not user.username_was.nil?)
    }
+  before_destroy :clear_permissions
+  after_destroy :remove_self_from_permissions
  
    has_many :authorized_keys, :foreign_key => :authorized_user_uuid, :primary_key => :uuid
    has_many :repositories, foreign_key: :owner_uuid, primary_key: :uuid
@@ -77,6 +82,12 @@ class User < ArvadosModel
       {read: true, write: true},
       {read: true, write: true, manage: true}]
  
+  VAL_FOR_PERM =
+    {:read => 1,
+     :write => 2,
+     :manage => 3}
+
+
    def full_name
      "#{first_name} #{last_name}".strip
    end
@@ -88,7 +99,7 @@ class User < ArvadosModel
    end
  
    def groups_i_can(verb)
-    my_groups = self.group_permissions.select { |uuid, mask| mask[verb] }.keys
+    my_groups = self.group_permissions(VAL_FOR_PERM[verb]).keys
      if verb == :read
        my_groups << anonymous_group_uuid
      end
@@ -107,60 +118,68 @@ class User < ArvadosModel
          end
        end
        next if target_uuid == self.uuid
-      next if (group_permissions[target_uuid] and
-               group_permissions[target_uuid][action])
-      if target.respond_to? :owner_uuid
-        next if target.owner_uuid == self.uuid
-        next if (group_permissions[target.owner_uuid] and
-                 group_permissions[target.owner_uuid][action])
-      end
-      sufficient_perms = case action
-                         when :manage
-                           ['can_manage']
-                         when :write
-                           ['can_manage', 'can_write']
-                         when :read
-                           ['can_manage', 'can_write', 'can_read']
-                         else
-                           # (Skip this kind of permission opportunity
-                           # if action is an unknown permission type)
-                         end
-      if sufficient_perms
-        # Check permission links with head_uuid pointing directly at
-        # the target object. If target is a Group, this is redundant
-        # and will fail except [a] if permission caching is broken or
-        # [b] during a race condition, where a permission link has
-        # *just* been added.
-        if Link.where(link_class: 'permission',
-                      name: sufficient_perms,
-                      tail_uuid: groups_i_can(action) + [self.uuid],
-                      head_uuid: target_uuid).any?
-          next
-        end
+
+      target_owner_uuid = target.owner_uuid if target.respond_to? :owner_uuid
+
+      user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: "$1", perm_level: "$3"}
+
+      unless ActiveRecord::Base.connection.
+        exec_query(%{
+SELECT 1 FROM #{PERMISSION_VIEW}
+  WHERE user_uuid in (#{user_uuids_subquery}) and
+        ((target_uuid = $2 and perm_level >= $3)
+         or (target_uuid = $4 and perm_level >= $3 and traverse_owned))
+},
+                  # "name" arg is a query label that appears in logs:
+                   "user_can_query",
+                   [[nil, self.uuid],
+                    [nil, target_uuid],
+                    [nil, VAL_FOR_PERM[action]],
+                    [nil, target_owner_uuid]]
+                  ).any?
+        return false
        end
-      return false
      end
      true
    end
  
-  def self.invalidate_permissions_cache(async=false)
-    refresh_permission_view(async)
+  def before_ownership_change
+    if owner_uuid_changed? and !self.owner_uuid_was.nil?
+      MaterializedPermission.where(user_uuid: owner_uuid_was, target_uuid: uuid).delete_all
+      update_permissions self.owner_uuid_was, self.uuid, REVOKE_PERM
+    end
+  end
+
+  def after_ownership_change
+    if owner_uuid_changed?
+      update_permissions self.owner_uuid, self.uuid, CAN_MANAGE_PERM
+    end
+  end
+
+  def clear_permissions
+    MaterializedPermission.where("user_uuid = ? and target_uuid != ?", uuid, uuid).delete_all
    end
  
-  def invalidate_permissions_cache
-    User.invalidate_permissions_cache
+  def remove_self_from_permissions
+    MaterializedPermission.where("target_uuid = ?", uuid).delete_all
+    check_permissions_against_full_refresh
    end
  
    # Return a hash of {user_uuid: group_perms}
+  #
+  # note: this does not account for permissions that a user gains by
+  # having can_manage on another user.
    def self.all_group_permissions
      all_perms = {}
      ActiveRecord::Base.connection.
-      exec_query("SELECT user_uuid, target_owner_uuid, perm_level, trashed
+      exec_query(%{
+SELECT user_uuid, target_uuid, perm_level
                    FROM #{PERMISSION_VIEW}
-                  WHERE target_owner_uuid IS NOT NULL",
+                  WHERE traverse_owned
+},
                    # "name" arg is a query label that appears in logs:
-                  "all_group_permissions",
-                  ).rows.each do |user_uuid, group_uuid, max_p_val, trashed|
+                 "all_group_permissions").
+      rows.each do |user_uuid, group_uuid, max_p_val|
        all_perms[user_uuid] ||= {}
        all_perms[user_uuid][group_uuid] = PERMS_FOR_VAL[max_p_val.to_i]
      end
@@ -170,18 +189,23 @@ class User < ArvadosModel
    # Return a hash of {group_uuid: perm_hash} where perm_hash[:read]
    # and perm_hash[:write] are true if this user can read and write
    # objects owned by group_uuid.
-  def group_permissions
-    group_perms = {self.uuid => {:read => true, :write => true, :manage => true}}
+  def group_permissions(level=1)
+    group_perms = {}
+
+    user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: "$1", perm_level: "$2"}
+
      ActiveRecord::Base.connection.
-      exec_query("SELECT target_owner_uuid, perm_level, trashed
-                  FROM #{PERMISSION_VIEW}
-                  WHERE user_uuid = $1
-                  AND target_owner_uuid IS NOT NULL",
+      exec_query(%{
+SELECT target_uuid, perm_level
+  FROM #{PERMISSION_VIEW}
+  WHERE user_uuid in (#{user_uuids_subquery}) and perm_level >= $2
+},
                    # "name" arg is a query label that appears in logs:
-                  "group_permissions for #{uuid}",
+                  "User.group_permissions",
                    # "binds" arg is an array of [col_id, value] for '$1' vars:
-                  [[nil, uuid]],
-                ).rows.each do |group_uuid, max_p_val, trashed|
+                  [[nil, uuid],
+                   [nil, level]]).
+      rows.each do |group_uuid, max_p_val|
        group_perms[group_uuid] = PERMS_FOR_VAL[max_p_val.to_i]
      end
      group_perms
@@ -309,6 +333,18 @@ class User < ArvadosModel
        self.uuid = new_uuid
        save!(validate: false)
        change_all_uuid_refs(old_uuid: old_uuid, new_uuid: new_uuid)
+    ActiveRecord::Base.connection.exec_update %{
+update #{PERMISSION_VIEW} set user_uuid=$1 where user_uuid = $2
+},
+                                             'User.update_uuid.update_permissions_user_uuid',
+                                             [[nil, new_uuid],
+                                              [nil, old_uuid]]
+      ActiveRecord::Base.connection.exec_update %{
+update #{PERMISSION_VIEW} set target_uuid=$1 where target_uuid = $2
+},
+                                            'User.update_uuid.update_permissions_target_uuid',
+                                             [[nil, new_uuid],
+                                              [nil, old_uuid]]
      end
    end
  
@@ -334,6 +370,9 @@ class User < ArvadosModel
        raise "user does not exist" if !new_user
        raise "cannot merge to an already merged user" if new_user.redirect_to_user_uuid
  
+      self.clear_permissions
+      new_user.clear_permissions
+
        # If 'self' is a remote user, don't transfer authorizations
        # (i.e. ability to access the account) to the new user, because
        # that gives the remote site the ability to access the 'new'
@@ -408,7 +447,12 @@ class User < ArvadosModel
        if redirect_to_new_user
          update_attributes!(redirect_to_user_uuid: new_user.uuid, username: nil)
        end
-      invalidate_permissions_cache
+      skip_check_permissions_against_full_refresh do
+        update_permissions self.uuid, self.uuid, CAN_MANAGE_PERM
+        update_permissions new_user.uuid, new_user.uuid, CAN_MANAGE_PERM
+        update_permissions new_user.owner_uuid, new_user.uuid, CAN_MANAGE_PERM
+      end
+      update_permissions self.owner_uuid, self.uuid, CAN_MANAGE_PERM
      end
    end
  
diff --git a/services/api/db/migrate/20200501150153_permission_table.rb b/services/api/db/migrate/20200501150153_permission_table.rb

new file mode 100644 (file)

index 0000000..4f9ea15
--- /dev/null
+++ b/services/api/db/migrate/20200501150153_permission_table.rb
@@ -0,0 +1,362 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require '20200501150153_permission_table_constants'
+
+class PermissionTable < ActiveRecord::Migration[5.0]
+  def up
+    # This is a major migration.  We are replacing the
+    # materialized_permission_view, which is fully recomputed any time
+    # a permission changes (and becomes very expensive as the number
+    # of users/groups becomes large), with a new strategy that only
+    # recomputes permissions for the subset of objects that are
+    # potentially affected by the addition or removal of a permission
+    # relationship (i.e. ownership or a permission link).
+    #
+    # This also disentangles the concept of "trashed groups" from the
+    # permissions system.  Updating trashed items follows a similar
+    # (but less complicated) strategy to updating permissions, so it
+    # may be helpful to look at that first.
+
+    ActiveRecord::Base.connection.execute "DROP MATERIALIZED VIEW IF EXISTS materialized_permission_view;"
+    drop_table :permission_refresh_lock
+
+    # This table stores the set of trashed groups and their trash_at
+    # time.  Used to exclude trashed projects and their contents when
+    # getting object listings.
+    create_table :trashed_groups, :id => false do |t|
+      t.string :group_uuid
+      t.datetime :trash_at
+    end
+    add_index :trashed_groups, :group_uuid, :unique => true
+
+    ActiveRecord::Base.connection.execute %{
+create or replace function project_subtree_with_trash_at (starting_uuid varchar(27), starting_trash_at timestamp)
+returns table (target_uuid varchar(27), trash_at timestamp)
+STABLE
+language SQL
+as $$
+/* Starting from a project, recursively traverse all the projects
+  underneath it and return a set of project uuids and trash_at times
+  (may be null).  The initial trash_at can be a timestamp or null.
+  The trash_at time propagates downward to groups it owns, i.e. when a
+  group is trashed, everything underneath it in the ownership
+  hierarchy is also considered trashed.  However, this is fact is
+  recorded in the trashed_groups table, not by updating trash_at field
+  in the groups table.
+*/
+WITH RECURSIVE
+        project_subtree(uuid, trash_at) as (
+        values (starting_uuid, starting_trash_at)
+        union
+        select groups.uuid, LEAST(project_subtree.trash_at, groups.trash_at)
+          from groups join project_subtree on (groups.owner_uuid = project_subtree.uuid)
+        )
+        select uuid, trash_at from project_subtree;
+$$;
+}
+
+    # Now populate the table.  For a non-test databse this is the only
+    # time this ever happens, after this the trash table is updated
+    # incrementally.  See app/models/group.rb#update_trash
+    refresh_trashed
+
+    # The table to store the flattened permissions.  This is almost
+    # exactly the same as the old materalized_permission_view except
+    # that the target_owner_uuid colunm in the view is now just a
+    # boolean traverse_owned (the column was only ever tested for null
+    # or non-null).
+    #
+    # For details on how this table is used to apply permissions to
+    # queries, see app/models/arvados_model.rb#readable_by
+    #
+    create_table :materialized_permissions, :id => false do |t|
+      t.string :user_uuid
+      t.string :target_uuid
+      t.integer :perm_level
+      t.boolean :traverse_owned
+    end
+    add_index :materialized_permissions, [:user_uuid, :target_uuid], unique: true, name: 'permission_user_target'
+    add_index :materialized_permissions, [:target_uuid], unique: false, name: 'permission_target'
+
+    ActiveRecord::Base.connection.execute %{
+create or replace function should_traverse_owned (starting_uuid varchar(27),
+                                                  starting_perm integer)
+  returns bool
+IMMUTABLE
+language SQL
+as $$
+/* Helper function.  Determines if permission on an object implies
+   transitive permission to things the object owns.  This is always
+   true for groups, but only true for users when the permission level
+   is can_manage.
+*/
+select starting_uuid like '_____-j7d0g-_______________' or
+       (starting_uuid like '_____-tpzed-_______________' and starting_perm >= 3);
+$$;
+}
+
+    # Merge all permission relationships into a single view.  This
+    # consists of: groups owned by users and projects, users owned
+    # by other users, users have permission on themselves,
+    # and explicit permission links.
+    #
+    # A SQL view gets inlined into the query where it is used as a
+    # subquery.  This enables the query planner to inject constraints,
+    # so it only has to look up edges it plans to traverse and avoid a brute
+    # force query of all edges.
+    ActiveRecord::Base.connection.execute %{
+create view permission_graph_edges as
+  select groups.owner_uuid as tail_uuid, groups.uuid as head_uuid,
+         (3) as val, groups.uuid as edge_id from groups
+union all
+  select users.owner_uuid as tail_uuid, users.uuid as head_uuid,
+         (3) as val, users.uuid as edge_id from users
+union all
+  select users.uuid as tail_uuid, users.uuid as head_uuid,
+         (3) as val, '' as edge_id from users
+union all
+  select links.tail_uuid,
+         links.head_uuid,
+         CASE
+           WHEN links.name = 'can_read'   THEN 1
+           WHEN links.name = 'can_login'  THEN 1
+           WHEN links.name = 'can_write'  THEN 2
+           WHEN links.name = 'can_manage' THEN 3
+           ELSE 0
+         END as val,
+         links.uuid as edge_id
+      from links
+      where links.link_class='permission'
+}
+
+    # This is used to ensure that the permission edge passed into
+    # compute_permission_subgraph takes replaces the existing edge in
+    # the "edges" view that is about to be removed.
+    edge_perm = %{
+case (edges.edge_id = perm_edge_id)
+                               when true then starting_perm
+                               else edges.val
+                            end
+}
+
+    # The primary function to compute permissions for a subgraph.
+    # Comments on how it works are inline.
+    #
+    # Due to performance issues due to the query optimizer not
+    # working across function and "with" expression boundaries, I
+    # had to fall back on using string templates for repeated code
+    # in order to inline it.
+
+    ActiveRecord::Base.connection.execute %{
+create or replace function compute_permission_subgraph (perm_origin_uuid varchar(27),
+                                                        starting_uuid varchar(27),
+                                                        starting_perm integer,
+                                                        perm_edge_id varchar(27))
+returns table (user_uuid varchar(27), target_uuid varchar(27), val integer, traverse_owned bool)
+STABLE
+language SQL
+as $$
+
+/* The purpose of this function is to compute the permissions for a
+   subgraph of the database, starting from a given edge.  The newly
+   computed permissions are used to add and remove rows from the main
+   permissions table.
+
+   perm_origin_uuid: The object that 'gets' the permission.
+
+   starting_uuid: The starting object the permission applies to.
+
+   starting_perm: The permission that perm_origin_uuid 'has' on
+                  starting_uuid One of 1, 2, 3 for can_read,
+                  can_write, can_manage respectively, or 0 to revoke
+                  permissions.
+
+   perm_edge_id: Identifies the permission edge that is being updated.
+                 Changes of ownership, this is starting_uuid.
+                 For links, this is the uuid of the link object.
+                 This is used to override the edge value in the database
+                 with starting_perm.  This is necessary when revoking
+                 permissions because the update happens before edge is
+                 actually removed.
+*/
+with
+  /* Starting from starting_uuid, determine the set of objects that
+     could be affected by this permission change.
+
+     Note: We don't traverse users unless it is an "identity"
+     permission (permission origin is self).
+  */
+  perm_from_start(perm_origin_uuid, target_uuid, val, traverse_owned) as (
+    #{PERM_QUERY_TEMPLATE % {:base_case => %{
+             values (perm_origin_uuid, starting_uuid, starting_perm,
+                    should_traverse_owned(starting_uuid, starting_perm),
+                    (perm_origin_uuid = starting_uuid or starting_uuid not like '_____-tpzed-_______________'))
+},
+:edge_perm => edge_perm
+} }),
+
+  /* Find other inbound edges that grant permissions to 'targets' in
+     perm_from_start, and compute permissions that originate from
+     those.
+
+     This is necessary for two reasons:
+
+       1) Other users may have access to a subset of the objects
+       through other permission links than the one we started from.
+       If we don't recompute them, their permission will get dropped.
+
+       2) There may be more than one path through which a user gets
+       permission to an object.  For example, a user owns a project
+       and also shares it can_read with a group the user belongs
+       to. adding the can_read link must not overwrite the existing
+       can_manage permission granted by ownership.
+  */
+  additional_perms(perm_origin_uuid, target_uuid, val, traverse_owned) as (
+    #{PERM_QUERY_TEMPLATE % {:base_case => %{
+    select edges.tail_uuid as origin_uuid, edges.head_uuid as target_uuid, edges.val,
+           should_traverse_owned(edges.head_uuid, edges.val),
+           edges.head_uuid like '_____-j7d0g-_______________'
+      from permission_graph_edges as edges
+      where edges.edge_id != perm_edge_id and
+            edges.tail_uuid not in (select target_uuid from perm_from_start where target_uuid like '_____-j7d0g-_______________') and
+            edges.head_uuid in (select target_uuid from perm_from_start)
+},
+:edge_perm => edge_perm
+} }),
+
+  /* Combine the permissions computed in the first two phases. */
+  all_perms(perm_origin_uuid, target_uuid, val, traverse_owned) as (
+      select * from perm_from_start
+    union all
+      select * from additional_perms
+  )
+
+  /* The actual query that produces rows to be added or removed
+     from the materialized_permissions table.  This is the clever
+     bit.
+
+     Key insights:
+
+     * For every group, the materialized_permissions lists all users
+       that can access to that group.
+
+     * The all_perms subquery has computed permissions on on a set of
+       objects for all inbound "origins", which are users or groups.
+
+     * Permissions through groups are transitive.
+
+     We can infer:
+
+     1) The materialized_permissions table declares that user X has permission N on group Y
+     2) The all_perms result has determined group Y has permission M on object Z
+     3) Therefore, user X has permission min(N, M) on object Z
+
+     This allows us to efficiently determine the set of users that
+     have permissions on the subset of objects, without having to
+     follow the chain of permission back up to find those users.
+
+     In addition, because users always have permission on themselves, this
+     query also makes sure those permission rows are always
+     returned.
+  */
+  select v.user_uuid, v.target_uuid, max(v.perm_level), bool_or(v.traverse_owned) from
+    (select m.user_uuid,
+         u.target_uuid,
+         least(u.val, m.perm_level) as perm_level,
+         u.traverse_owned
+      from all_perms as u, materialized_permissions as m
+           where u.perm_origin_uuid = m.target_uuid AND m.traverse_owned
+           AND (m.user_uuid = m.target_uuid or m.target_uuid not like '_____-tpzed-_______________')
+    union all
+      select target_uuid as user_uuid, target_uuid, 3, true
+        from all_perms
+        where all_perms.target_uuid like '_____-tpzed-_______________') as v
+    group by v.user_uuid, v.target_uuid
+$$;
+     }
+
+    #
+    # Populate materialized_permissions by traversing permissions
+    # starting at each user.
+    #
+    refresh_permissions
+  end
+
+  def down
+    drop_table :materialized_permissions
+    drop_table :trashed_groups
+
+    ActiveRecord::Base.connection.execute "DROP function project_subtree_with_trash_at (varchar, timestamp);"
+    ActiveRecord::Base.connection.execute "DROP function compute_permission_subgraph (varchar, varchar, integer, varchar);"
+    ActiveRecord::Base.connection.execute "DROP function should_traverse_owned(varchar, integer);"
+    ActiveRecord::Base.connection.execute "DROP view permission_graph_edges;"
+
+    ActiveRecord::Base.connection.execute(%{
+CREATE MATERIALIZED VIEW materialized_permission_view AS
+ WITH RECURSIVE perm_value(name, val) AS (
+         VALUES ('can_read'::text,(1)::smallint), ('can_login'::text,1), ('can_write'::text,2), ('can_manage'::text,3)
+        ), perm_edges(tail_uuid, head_uuid, val, follow, trashed) AS (
+         SELECT links.tail_uuid,
+            links.head_uuid,
+            pv.val,
+            ((pv.val = 3) OR (groups.uuid IS NOT NULL)) AS follow,
+            (0)::smallint AS trashed,
+            (0)::smallint AS followtrash
+           FROM ((public.links
+             LEFT JOIN perm_value pv ON ((pv.name = (links.name)::text)))
+             LEFT JOIN public.groups ON (((pv.val < 3) AND ((groups.uuid)::text = (links.head_uuid)::text))))
+          WHERE ((links.link_class)::text = 'permission'::text)
+        UNION ALL
+         SELECT groups.owner_uuid,
+            groups.uuid,
+            3,
+            true AS bool,
+                CASE
+                    WHEN ((groups.trash_at IS NOT NULL) AND (groups.trash_at < clock_timestamp())) THEN 1
+                    ELSE 0
+                END AS "case",
+            1
+           FROM public.groups
+        ), perm(val, follow, user_uuid, target_uuid, trashed) AS (
+         SELECT (3)::smallint AS val,
+            true AS follow,
+            (users.uuid)::character varying(32) AS user_uuid,
+            (users.uuid)::character varying(32) AS target_uuid,
+            (0)::smallint AS trashed
+           FROM public.users
+        UNION
+         SELECT (LEAST((perm_1.val)::integer, edges.val))::smallint AS val,
+            edges.follow,
+            perm_1.user_uuid,
+            (edges.head_uuid)::character varying(32) AS target_uuid,
+            ((GREATEST((perm_1.trashed)::integer, edges.trashed) * edges.followtrash))::smallint AS trashed
+           FROM (perm perm_1
+             JOIN perm_edges edges ON ((perm_1.follow AND ((edges.tail_uuid)::text = (perm_1.target_uuid)::text))))
+        )
+ SELECT perm.user_uuid,
+    perm.target_uuid,
+    max(perm.val) AS perm_level,
+        CASE perm.follow
+            WHEN true THEN perm.target_uuid
+            ELSE NULL::character varying
+        END AS target_owner_uuid,
+    max(perm.trashed) AS trashed
+   FROM perm
+  GROUP BY perm.user_uuid, perm.target_uuid,
+        CASE perm.follow
+            WHEN true THEN perm.target_uuid
+            ELSE NULL::character varying
+        END
+  WITH NO DATA;
+}
+    )
+
+    add_index :materialized_permission_view, [:trashed, :target_uuid], name: 'permission_target_trashed'
+    add_index :materialized_permission_view, [:user_uuid, :trashed, :perm_level], name: 'permission_target_user_trashed_level'
+    create_table :permission_refresh_lock
+
+    ActiveRecord::Base.connection.execute 'REFRESH MATERIALIZED VIEW materialized_permission_view;'
+  end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql

index 88cd0baa2f7bab44be54080e9d9b6a732d210d16..469475ad9604232fbeafc76889c4665b6cd77b80 100644 (file)
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -38,6 +38,216 @@ CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
  -- COMMENT ON EXTENSION pg_trgm IS 'text similarity measurement and index searching based on trigrams';
  
  
+--
+-- Name: compute_permission_subgraph(character varying, character varying, integer, character varying); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.compute_permission_subgraph(perm_origin_uuid character varying, starting_uuid character varying, starting_perm integer, perm_edge_id character varying) RETURNS TABLE(user_uuid character varying, target_uuid character varying, val integer, traverse_owned boolean)
+    LANGUAGE sql STABLE
+    AS $$
+
+/* The purpose of this function is to compute the permissions for a
+   subgraph of the database, starting from a given edge.  The newly
+   computed permissions are used to add and remove rows from the main
+   permissions table.
+
+   perm_origin_uuid: The object that 'gets' the permission.
+
+   starting_uuid: The starting object the permission applies to.
+
+   starting_perm: The permission that perm_origin_uuid 'has' on
+                  starting_uuid One of 1, 2, 3 for can_read,
+                  can_write, can_manage respectively, or 0 to revoke
+                  permissions.
+
+   perm_edge_id: Identifies the permission edge that is being updated.
+                 Changes of ownership, this is starting_uuid.
+                 For links, this is the uuid of the link object.
+                 This is used to override the edge value in the database
+                 with starting_perm.  This is necessary when revoking
+                 permissions because the update happens before edge is
+                 actually removed.
+*/
+with
+  /* Starting from starting_uuid, determine the set of objects that
+     could be affected by this permission change.
+
+     Note: We don't traverse users unless it is an "identity"
+     permission (permission origin is self).
+  */
+  perm_from_start(perm_origin_uuid, target_uuid, val, traverse_owned) as (
+    
+WITH RECURSIVE
+        traverse_graph(origin_uuid, target_uuid, val, traverse_owned, starting_set) as (
+            
+             values (perm_origin_uuid, starting_uuid, starting_perm,
+                    should_traverse_owned(starting_uuid, starting_perm),
+                    (perm_origin_uuid = starting_uuid or starting_uuid not like '_____-tpzed-_______________'))
+
+          union
+            (select traverse_graph.origin_uuid,
+                    edges.head_uuid,
+                      least(
+case (edges.edge_id = perm_edge_id)
+                               when true then starting_perm
+                               else edges.val
+                            end
+,
+                            traverse_graph.val),
+                    should_traverse_owned(edges.head_uuid, edges.val),
+                    false
+             from permission_graph_edges as edges, traverse_graph
+             where traverse_graph.target_uuid = edges.tail_uuid
+             and (edges.tail_uuid like '_____-j7d0g-_______________' or
+                  traverse_graph.starting_set)))
+        select traverse_graph.origin_uuid, target_uuid, max(val) as val, bool_or(traverse_owned) as traverse_owned from traverse_graph
+        group by (traverse_graph.origin_uuid, target_uuid)
+),
+
+  /* Find other inbound edges that grant permissions to 'targets' in
+     perm_from_start, and compute permissions that originate from
+     those.
+
+     This is necessary for two reasons:
+
+       1) Other users may have access to a subset of the objects
+       through other permission links than the one we started from.
+       If we don't recompute them, their permission will get dropped.
+
+       2) There may be more than one path through which a user gets
+       permission to an object.  For example, a user owns a project
+       and also shares it can_read with a group the user belongs
+       to. adding the can_read link must not overwrite the existing
+       can_manage permission granted by ownership.
+  */
+  additional_perms(perm_origin_uuid, target_uuid, val, traverse_owned) as (
+    
+WITH RECURSIVE
+        traverse_graph(origin_uuid, target_uuid, val, traverse_owned, starting_set) as (
+            
+    select edges.tail_uuid as origin_uuid, edges.head_uuid as target_uuid, edges.val,
+           should_traverse_owned(edges.head_uuid, edges.val),
+           edges.head_uuid like '_____-j7d0g-_______________'
+      from permission_graph_edges as edges
+      where edges.edge_id != perm_edge_id and
+            edges.tail_uuid not in (select target_uuid from perm_from_start where target_uuid like '_____-j7d0g-_______________') and
+            edges.head_uuid in (select target_uuid from perm_from_start)
+
+          union
+            (select traverse_graph.origin_uuid,
+                    edges.head_uuid,
+                      least(
+case (edges.edge_id = perm_edge_id)
+                               when true then starting_perm
+                               else edges.val
+                            end
+,
+                            traverse_graph.val),
+                    should_traverse_owned(edges.head_uuid, edges.val),
+                    false
+             from permission_graph_edges as edges, traverse_graph
+             where traverse_graph.target_uuid = edges.tail_uuid
+             and (edges.tail_uuid like '_____-j7d0g-_______________' or
+                  traverse_graph.starting_set)))
+        select traverse_graph.origin_uuid, target_uuid, max(val) as val, bool_or(traverse_owned) as traverse_owned from traverse_graph
+        group by (traverse_graph.origin_uuid, target_uuid)
+),
+
+  /* Combine the permissions computed in the first two phases. */
+  all_perms(perm_origin_uuid, target_uuid, val, traverse_owned) as (
+      select * from perm_from_start
+    union all
+      select * from additional_perms
+  )
+
+  /* The actual query that produces rows to be added or removed
+     from the materialized_permissions table.  This is the clever
+     bit.
+
+     Key insights:
+
+     * For every group, the materialized_permissions lists all users
+       that can access to that group.
+
+     * The all_perms subquery has computed permissions on on a set of
+       objects for all inbound "origins", which are users or groups.
+
+     * Permissions through groups are transitive.
+
+     We can infer:
+
+     1) The materialized_permissions table declares that user X has permission N on group Y
+     2) The all_perms result has determined group Y has permission M on object Z
+     3) Therefore, user X has permission min(N, M) on object Z
+
+     This allows us to efficiently determine the set of users that
+     have permissions on the subset of objects, without having to
+     follow the chain of permission back up to find those users.
+
+     In addition, because users always have permission on themselves, this
+     query also makes sure those permission rows are always
+     returned.
+  */
+  select v.user_uuid, v.target_uuid, max(v.perm_level), bool_or(v.traverse_owned) from
+    (select m.user_uuid,
+         u.target_uuid,
+         least(u.val, m.perm_level) as perm_level,
+         u.traverse_owned
+      from all_perms as u, materialized_permissions as m
+           where u.perm_origin_uuid = m.target_uuid AND m.traverse_owned
+           AND (m.user_uuid = m.target_uuid or m.target_uuid not like '_____-tpzed-_______________')
+    union all
+      select target_uuid as user_uuid, target_uuid, 3, true
+        from all_perms
+        where all_perms.target_uuid like '_____-tpzed-_______________') as v
+    group by v.user_uuid, v.target_uuid
+$$;
+
+
+--
+-- Name: project_subtree_with_trash_at(character varying, timestamp without time zone); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.project_subtree_with_trash_at(starting_uuid character varying, starting_trash_at timestamp without time zone) RETURNS TABLE(target_uuid character varying, trash_at timestamp without time zone)
+    LANGUAGE sql STABLE
+    AS $$
+/* Starting from a project, recursively traverse all the projects
+  underneath it and return a set of project uuids and trash_at times
+  (may be null).  The initial trash_at can be a timestamp or null.
+  The trash_at time propagates downward to groups it owns, i.e. when a
+  group is trashed, everything underneath it in the ownership
+  hierarchy is also considered trashed.  However, this is fact is
+  recorded in the trashed_groups table, not by updating trash_at field
+  in the groups table.
+*/
+WITH RECURSIVE
+        project_subtree(uuid, trash_at) as (
+        values (starting_uuid, starting_trash_at)
+        union
+        select groups.uuid, LEAST(project_subtree.trash_at, groups.trash_at)
+          from groups join project_subtree on (groups.owner_uuid = project_subtree.uuid)
+        )
+        select uuid, trash_at from project_subtree;
+$$;
+
+
+--
+-- Name: should_traverse_owned(character varying, integer); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.should_traverse_owned(starting_uuid character varying, starting_perm integer) RETURNS boolean
+    LANGUAGE sql IMMUTABLE
+    AS $$
+/* Helper function.  Determines if permission on an object implies
+   transitive permission to things the object owns.  This is always
+   true for groups, but only true for users when the permission level
+   is can_manage.
+*/
+select starting_uuid like '_____-j7d0g-_______________' or
+       (starting_uuid like '_____-tpzed-_______________' and starting_perm >= 3);
+$$;
+
+
  SET default_tablespace = '';
  
  SET default_with_oids = false;
@@ -719,93 +929,17 @@ ALTER SEQUENCE public.logs_id_seq OWNED BY public.logs.id;
  
  
  --
--- Name: users; Type: TABLE; Schema: public; Owner: -
+-- Name: materialized_permissions; Type: TABLE; Schema: public; Owner: -
  --
  
-CREATE TABLE public.users (
-    id integer NOT NULL,
-    uuid character varying(255),
-    owner_uuid character varying(255) NOT NULL,
-    created_at timestamp without time zone NOT NULL,
-    modified_by_client_uuid character varying(255),
-    modified_by_user_uuid character varying(255),
-    modified_at timestamp without time zone,
-    email character varying(255),
-    first_name character varying(255),
-    last_name character varying(255),
-    identity_url character varying(255),
-    is_admin boolean,
-    prefs text,
-    updated_at timestamp without time zone NOT NULL,
-    default_owner_uuid character varying(255),
-    is_active boolean DEFAULT false,
-    username character varying(255),
-    redirect_to_user_uuid character varying
+CREATE TABLE public.materialized_permissions (
+    user_uuid character varying,
+    target_uuid character varying,
+    perm_level integer,
+    traverse_owned boolean
  );
  
  
---
--- Name: materialized_permission_view; Type: MATERIALIZED VIEW; Schema: public; Owner: -
---
-
-CREATE MATERIALIZED VIEW public.materialized_permission_view AS
- WITH RECURSIVE perm_value(name, val) AS (
-         VALUES ('can_read'::text,(1)::smallint), ('can_login'::text,1), ('can_write'::text,2), ('can_manage'::text,3)
-        ), perm_edges(tail_uuid, head_uuid, val, follow, trashed) AS (
-         SELECT links.tail_uuid,
-            links.head_uuid,
-            pv.val,
-            ((pv.val = 3) OR (groups.uuid IS NOT NULL)) AS follow,
-            (0)::smallint AS trashed,
-            (0)::smallint AS followtrash
-           FROM ((public.links
-             LEFT JOIN perm_value pv ON ((pv.name = (links.name)::text)))
-             LEFT JOIN public.groups ON (((pv.val < 3) AND ((groups.uuid)::text = (links.head_uuid)::text))))
-          WHERE ((links.link_class)::text = 'permission'::text)
-        UNION ALL
-         SELECT groups.owner_uuid,
-            groups.uuid,
-            3,
-            true AS bool,
-                CASE
-                    WHEN ((groups.trash_at IS NOT NULL) AND (groups.trash_at < clock_timestamp())) THEN 1
-                    ELSE 0
-                END AS "case",
-            1
-           FROM public.groups
-        ), perm(val, follow, user_uuid, target_uuid, trashed) AS (
-         SELECT (3)::smallint AS val,
-            true AS follow,
-            (users.uuid)::character varying(32) AS user_uuid,
-            (users.uuid)::character varying(32) AS target_uuid,
-            (0)::smallint AS trashed
-           FROM public.users
-        UNION
-         SELECT (LEAST((perm_1.val)::integer, edges.val))::smallint AS val,
-            edges.follow,
-            perm_1.user_uuid,
-            (edges.head_uuid)::character varying(32) AS target_uuid,
-            ((GREATEST((perm_1.trashed)::integer, edges.trashed) * edges.followtrash))::smallint AS trashed
-           FROM (perm perm_1
-             JOIN perm_edges edges ON ((perm_1.follow AND ((edges.tail_uuid)::text = (perm_1.target_uuid)::text))))
-        )
- SELECT perm.user_uuid,
-    perm.target_uuid,
-    max(perm.val) AS perm_level,
-        CASE perm.follow
-            WHEN true THEN perm.target_uuid
-            ELSE NULL::character varying
-        END AS target_owner_uuid,
-    max(perm.trashed) AS trashed
-   FROM perm
-  GROUP BY perm.user_uuid, perm.target_uuid,
-        CASE perm.follow
-            WHEN true THEN perm.target_uuid
-            ELSE NULL::character varying
-        END
-  WITH NO DATA;
-
-
  --
  -- Name: nodes; Type: TABLE; Schema: public; Owner: -
  --
@@ -851,31 +985,66 @@ ALTER SEQUENCE public.nodes_id_seq OWNED BY public.nodes.id;
  
  
  --
--- Name: permission_refresh_lock; Type: TABLE; Schema: public; Owner: -
+-- Name: users; Type: TABLE; Schema: public; Owner: -
  --
  
-CREATE TABLE public.permission_refresh_lock (
-    id integer NOT NULL
+CREATE TABLE public.users (
+    id integer NOT NULL,
+    uuid character varying(255),
+    owner_uuid character varying(255) NOT NULL,
+    created_at timestamp without time zone NOT NULL,
+    modified_by_client_uuid character varying(255),
+    modified_by_user_uuid character varying(255),
+    modified_at timestamp without time zone,
+    email character varying(255),
+    first_name character varying(255),
+    last_name character varying(255),
+    identity_url character varying(255),
+    is_admin boolean,
+    prefs text,
+    updated_at timestamp without time zone NOT NULL,
+    default_owner_uuid character varying(255),
+    is_active boolean DEFAULT false,
+    username character varying(255),
+    redirect_to_user_uuid character varying
  );
  
  
  --
--- Name: permission_refresh_lock_id_seq; Type: SEQUENCE; Schema: public; Owner: -
---
-
-CREATE SEQUENCE public.permission_refresh_lock_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
---
--- Name: permission_refresh_lock_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
---
-
-ALTER SEQUENCE public.permission_refresh_lock_id_seq OWNED BY public.permission_refresh_lock.id;
+-- Name: permission_graph_edges; Type: VIEW; Schema: public; Owner: -
+--
+
+CREATE VIEW public.permission_graph_edges AS
+ SELECT groups.owner_uuid AS tail_uuid,
+    groups.uuid AS head_uuid,
+    3 AS val,
+    groups.uuid AS edge_id
+   FROM public.groups
+UNION ALL
+ SELECT users.owner_uuid AS tail_uuid,
+    users.uuid AS head_uuid,
+    3 AS val,
+    users.uuid AS edge_id
+   FROM public.users
+UNION ALL
+ SELECT users.uuid AS tail_uuid,
+    users.uuid AS head_uuid,
+    3 AS val,
+    ''::character varying AS edge_id
+   FROM public.users
+UNION ALL
+ SELECT links.tail_uuid,
+    links.head_uuid,
+        CASE
+            WHEN ((links.name)::text = 'can_read'::text) THEN 1
+            WHEN ((links.name)::text = 'can_login'::text) THEN 1
+            WHEN ((links.name)::text = 'can_write'::text) THEN 2
+            WHEN ((links.name)::text = 'can_manage'::text) THEN 3
+            ELSE 0
+        END AS val,
+    links.uuid AS edge_id
+   FROM public.links
+  WHERE ((links.link_class)::text = 'permission'::text);
  
  
  --
@@ -1079,6 +1248,16 @@ CREATE SEQUENCE public.traits_id_seq
  ALTER SEQUENCE public.traits_id_seq OWNED BY public.traits.id;
  
  
+--
+-- Name: trashed_groups; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.trashed_groups (
+    group_uuid character varying,
+    trash_at timestamp without time zone
+);
+
+
  --
  -- Name: users_id_seq; Type: SEQUENCE; Schema: public; Owner: -
  --
@@ -1277,13 +1456,6 @@ ALTER TABLE ONLY public.logs ALTER COLUMN id SET DEFAULT nextval('public.logs_id
  ALTER TABLE ONLY public.nodes ALTER COLUMN id SET DEFAULT nextval('public.nodes_id_seq'::regclass);
  
  
---
--- Name: permission_refresh_lock id; Type: DEFAULT; Schema: public; Owner: -
---
-
-ALTER TABLE ONLY public.permission_refresh_lock ALTER COLUMN id SET DEFAULT nextval('public.permission_refresh_lock_id_seq'::regclass);
-
-
  --
  -- Name: pipeline_instances id; Type: DEFAULT; Schema: public; Owner: -
  --
@@ -1468,14 +1640,6 @@ ALTER TABLE ONLY public.nodes
      ADD CONSTRAINT nodes_pkey PRIMARY KEY (id);
  
  
---
--- Name: permission_refresh_lock permission_refresh_lock_pkey; Type: CONSTRAINT; Schema: public; Owner: -
---
-
-ALTER TABLE ONLY public.permission_refresh_lock
-    ADD CONSTRAINT permission_refresh_lock_pkey PRIMARY KEY (id);
-
-
  --
  -- Name: pipeline_instances pipeline_instances_pkey; Type: CONSTRAINT; Schema: public; Owner: -
  --
@@ -2513,6 +2677,13 @@ CREATE INDEX index_traits_on_owner_uuid ON public.traits USING btree (owner_uuid
  CREATE UNIQUE INDEX index_traits_on_uuid ON public.traits USING btree (uuid);
  
  
+--
+-- Name: index_trashed_groups_on_group_uuid; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE UNIQUE INDEX index_trashed_groups_on_group_uuid ON public.trashed_groups USING btree (group_uuid);
+
+
  --
  -- Name: index_users_on_created_at; Type: INDEX; Schema: public; Owner: -
  --
@@ -2703,17 +2874,17 @@ CREATE INDEX nodes_search_index ON public.nodes USING btree (uuid, owner_uuid, m
  
  
  --
--- Name: permission_target_trashed; Type: INDEX; Schema: public; Owner: -
+-- Name: permission_target; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX permission_target_trashed ON public.materialized_permission_view USING btree (trashed, target_uuid);
+CREATE INDEX permission_target ON public.materialized_permissions USING btree (target_uuid);
  
  
  --
--- Name: permission_target_user_trashed_level; Type: INDEX; Schema: public; Owner: -
+-- Name: permission_user_target; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX permission_target_user_trashed_level ON public.materialized_permission_view USING btree (user_uuid, trashed, perm_level);
+CREATE UNIQUE INDEX permission_user_target ON public.materialized_permissions USING btree (user_uuid, target_uuid);
  
  
  --
@@ -3024,6 +3195,7 @@ INSERT INTO "schema_migrations" (version) VALUES
  ('20190523180148'),
  ('20190808145904'),
  ('20190809135453'),
-('20190905151603');
+('20190905151603'),
+('20200501150153');
  
  
diff --git a/services/api/lib/20200501150153_permission_table_constants.rb b/services/api/lib/20200501150153_permission_table_constants.rb

new file mode 100644 (file)

index 0000000..6e43a62
--- /dev/null
+++ b/services/api/lib/20200501150153_permission_table_constants.rb
@@ -0,0 +1,85 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# These constants are used in both
+# db/migrate/20200501150153_permission_table and update_permissions
+#
+# This file allows them to be easily imported by both to avoid duplication.
+#
+# Don't mess with this!  Any changes will affect both the current
+# update_permissions and the past migration.  If you are tinkering
+# with the permission system and need to change how
+# PERM_QUERY_TEMPLATE, refresh_trashed or refresh_permissions works,
+# you should make a new file with your modified functions and have
+# update_permissions reference that file instead.
+
+PERMISSION_VIEW = "materialized_permissions"
+
+TRASHED_GROUPS = "trashed_groups"
+
+# We need to use this parameterized query in a few different places,
+# including as a subquery in a larger query.
+#
+# There's basically two options, the way I did this originally was to
+# put this in a postgres function and do a lateral join over it.
+# However, postgres functions impose an optimization barrier, and
+# possibly have other overhead with temporary tables, so I ended up
+# going with the brute force approach of inlining the whole thing.
+#
+# The two substitutions are "base_case" which determines the initial
+# set of permission origins and "edge_perm" which is used to ensure
+# that the new permission takes precedence over the one in the edges
+# table (but some queries don't need that.)
+#
+PERM_QUERY_TEMPLATE = %{
+WITH RECURSIVE
+        traverse_graph(origin_uuid, target_uuid, val, traverse_owned, starting_set) as (
+            %{base_case}
+          union
+            (select traverse_graph.origin_uuid,
+                    edges.head_uuid,
+                      least(%{edge_perm},
+                            traverse_graph.val),
+                    should_traverse_owned(edges.head_uuid, edges.val),
+                    false
+             from permission_graph_edges as edges, traverse_graph
+             where traverse_graph.target_uuid = edges.tail_uuid
+             and (edges.tail_uuid like '_____-j7d0g-_______________' or
+                  traverse_graph.starting_set)))
+        select traverse_graph.origin_uuid, target_uuid, max(val) as val, bool_or(traverse_owned) as traverse_owned from traverse_graph
+        group by (traverse_graph.origin_uuid, target_uuid)
+}
+
+def refresh_trashed
+  ActiveRecord::Base.transaction do
+    ActiveRecord::Base.connection.execute("LOCK TABLE #{TRASHED_GROUPS}")
+    ActiveRecord::Base.connection.execute("DELETE FROM #{TRASHED_GROUPS}")
+
+    # Helper populate trashed_groups table. This starts with
+    #   each group owned by a user and computes the subtree under that
+    #   group to find any groups that are trashed.
+    ActiveRecord::Base.connection.execute(%{
+INSERT INTO #{TRASHED_GROUPS}
+select ps.target_uuid as group_uuid, ps.trash_at from groups,
+  lateral project_subtree_with_trash_at(groups.uuid, groups.trash_at) ps
+  where groups.owner_uuid like '_____-tpzed-_______________'
+})
+  end
+end
+
+def refresh_permissions
+  ActiveRecord::Base.transaction do
+    ActiveRecord::Base.connection.execute("LOCK TABLE #{PERMISSION_VIEW}")
+    ActiveRecord::Base.connection.execute("DELETE FROM #{PERMISSION_VIEW}")
+
+    ActiveRecord::Base.connection.execute %{
+INSERT INTO materialized_permissions
+    #{PERM_QUERY_TEMPLATE % {:base_case => %{
+        select uuid, uuid, 3, true, true from users
+},
+:edge_perm => 'edges.val'
+} }
+}, "refresh_permission_view.do"
+  end
+end
diff --git a/services/api/lib/refresh_permission_view.rb b/services/api/lib/refresh_permission_view.rb

deleted file mode 100644 (file)

index 5d6081f..0000000
--- a/services/api/lib/refresh_permission_view.rb
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-PERMISSION_VIEW = "materialized_permission_view"
-
-def do_refresh_permission_view
-  ActiveRecord::Base.transaction do
-    ActiveRecord::Base.connection.execute("LOCK TABLE permission_refresh_lock")
-    ActiveRecord::Base.connection.execute("REFRESH MATERIALIZED VIEW #{PERMISSION_VIEW}")
-  end
-end
-
-def refresh_permission_view(async=false)
-  if async and Rails.configuration.API.AsyncPermissionsUpdateInterval > 0
-    exp = Rails.configuration.API.AsyncPermissionsUpdateInterval.seconds
-    need = false
-    Rails.cache.fetch('AsyncRefreshPermissionView', expires_in: exp) do
-      need = true
-    end
-    if need
-      # Schedule a new permission update and return immediately
-      Thread.new do
-        Thread.current.abort_on_exception = false
-        begin
-          sleep(exp)
-          Rails.cache.delete('AsyncRefreshPermissionView')
-          do_refresh_permission_view
-        rescue => e
-          Rails.logger.error "Updating permission view: #{e}\n#{e.backtrace.join("\n\t")}"
-        ensure
-          ActiveRecord::Base.connection.close
-        end
-      end
-      true
-    end
-  else
-    do_refresh_permission_view
-  end
-end
diff --git a/services/api/lib/update_permissions.rb b/services/api/lib/update_permissions.rb

new file mode 100644 (file)

index 0000000..4c2e72d
--- /dev/null
+++ b/services/api/lib/update_permissions.rb
@@ -0,0 +1,205 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require '20200501150153_permission_table_constants'
+
+REVOKE_PERM = 0
+CAN_MANAGE_PERM = 3
+
+def update_permissions perm_origin_uuid, starting_uuid, perm_level, edge_id=nil
+  #
+  # Update a subset of the permission table affected by adding or
+  # removing a particular permission relationship (ownership or a
+  # permission link).
+  #
+  # perm_origin_uuid: This is the object that 'gets' the permission.
+  # It is the owner_uuid or tail_uuid.
+  #
+  # starting_uuid: The object we are computing permission for (or head_uuid)
+  #
+  # perm_level: The level of permission that perm_origin_uuid gets for starting_uuid.
+  #
+  # perm_level is a number from 0-3
+  #   can_read=1
+  #   can_write=2
+  #   can_manage=3
+  #   or call with perm_level=0 to revoke permissions
+  #
+  # check: for testing/debugging, compare the result of the
+  # incremental update against a full table recompute.  Throws an
+  # error if the contents are not identical (ie they produce different
+  # permission results)
+
+  # Theory of operation
+  #
+  # Give a change in a specific permission relationship, we recompute
+  # the set of permissions (for all users) that could possibly be
+  # affected by that relationship.  For example, if a project is
+  # shared with another user, we recompute all permissions for all
+  # projects in the hierarchy.  This returns a set of updated
+  # permissions, which we stash in a temporary table.
+  #
+  # Then, for each user_uuid/target_uuid in the updated permissions
+  # result set we insert/update a permission row in
+  # materialized_permissions, and delete any rows that exist in
+  # materialized_permissions that are not in the result set or have
+  # perm_level=0.
+  #
+  # see db/migrate/20200501150153_permission_table.rb for details on
+  # how the permissions are computed.
+
+  if edge_id.nil?
+    # For changes of ownership, edge_id is starting_uuid.  In turns
+    # out most invocations of update_permissions are for changes of
+    # ownership, so make this parameter optional to reduce
+    # clutter.
+    # For permission links, the uuid of the link object will be passed in for edge_id.
+    edge_id = starting_uuid
+  end
+
+  ActiveRecord::Base.transaction do
+
+    # "Conflicts with the ROW EXCLUSIVE, SHARE UPDATE EXCLUSIVE, SHARE
+    # ROW EXCLUSIVE, EXCLUSIVE, and ACCESS EXCLUSIVE lock modes. This
+    # mode protects a table against concurrent data changes."
+    ActiveRecord::Base.connection.execute "LOCK TABLE #{PERMISSION_VIEW} in SHARE MODE"
+
+    # Workaround for
+    # BUG #15160: planner overestimates number of rows in join when there are more than 200 rows coming from CTE
+    # https://www.postgresql.org/message-id/152395805004.19366.3107109716821067806@wrigleys.postgresql.org
+    #
+    # For a crucial join in the compute_permission_subgraph() query, the
+    # planner mis-estimates the number of rows in a Common Table
+    # Expression (CTE, this is a subquery in a WITH clause) and as a
+    # result it chooses the wrong join order.  The join starts with the
+    # permissions table because it mistakenly thinks
+    # count(materalized_permissions) < count(new computed permissions)
+    # when actually it is the other way around.
+    #
+    # Because of the incorrect join order, it choose the wrong join
+    # strategy (merge join, which works best when two tables are roughly
+    # the same size).  As a workaround, we can tell it not to use that
+    # join strategy, this causes it to pick hash join instead, which
+    # turns out to be a bit better.  However, because the join order is
+    # still wrong, we don't get the full benefit of the index.
+    #
+    # This is very unfortunate because it makes the query performance
+    # dependent on the size of the materalized_permissions table, when
+    # the goal of this design was to make permission updates scale-free
+    # and only depend on the number of permissions affected and not the
+    # total table size.  In several hours of researching I wasn't able
+    # to find a way to force the correct join order, so I'm calling it
+    # here and I have to move on.
+    #
+    # This is apparently addressed in Postgres 12, but I developed &
+    # tested this on Postgres 9.6, so in the future we should reevaluate
+    # the performance & query plan on Postgres 12.
+    #
+    # https://git.furworks.de/opensourcemirror/postgresql/commit/a314c34079cf06d05265623dd7c056f8fa9d577f
+    #
+    # Disable merge join for just this query (also local for this transaction), then reenable it.
+    ActiveRecord::Base.connection.exec_query "SET LOCAL enable_mergejoin to false;"
+
+    temptable_perms = "temp_perms_#{rand(2**64).to_s(10)}"
+    ActiveRecord::Base.connection.exec_query %{
+create temporary table #{temptable_perms} on commit drop
+as select * from compute_permission_subgraph($1, $2, $3, $4)
+},
+                                             'update_permissions.select',
+                                             [[nil, perm_origin_uuid],
+                                              [nil, starting_uuid],
+                                              [nil, perm_level],
+                                              [nil, edge_id]]
+
+    ActiveRecord::Base.connection.exec_query "SET LOCAL enable_mergejoin to true;"
+
+    ActiveRecord::Base.connection.exec_delete %{
+delete from #{PERMISSION_VIEW} where
+  target_uuid in (select target_uuid from #{temptable_perms}) and
+  not exists (select 1 from #{temptable_perms}
+              where target_uuid=#{PERMISSION_VIEW}.target_uuid and
+                    user_uuid=#{PERMISSION_VIEW}.user_uuid and
+                    val>0)
+},
+                                              "update_permissions.delete"
+
+    ActiveRecord::Base.connection.exec_query %{
+insert into #{PERMISSION_VIEW} (user_uuid, target_uuid, perm_level, traverse_owned)
+  select user_uuid, target_uuid, val as perm_level, traverse_owned from #{temptable_perms} where val>0
+on conflict (user_uuid, target_uuid) do update set perm_level=EXCLUDED.perm_level, traverse_owned=EXCLUDED.traverse_owned;
+},
+                                             "update_permissions.insert"
+
+    if perm_level>0
+      check_permissions_against_full_refresh
+    end
+  end
+end
+
+
+def check_permissions_against_full_refresh
+  # No-op except when running tests
+  return unless Rails.env == 'test' and !Thread.current[:no_check_permissions_against_full_refresh]
+
+  # For checking correctness of the incremental permission updates.
+  # Check contents of the current 'materialized_permission' table
+  # against a from-scratch permission refresh.
+
+  q1 = ActiveRecord::Base.connection.exec_query %{
+select user_uuid, target_uuid, perm_level, traverse_owned from #{PERMISSION_VIEW}
+order by user_uuid, target_uuid
+}, "check_permissions_against_full_refresh.permission_table"
+
+  q2 = ActiveRecord::Base.connection.exec_query %{
+    select pq.origin_uuid as user_uuid, target_uuid, pq.val as perm_level, pq.traverse_owned from (
+    #{PERM_QUERY_TEMPLATE % {:base_case => %{
+        select uuid, uuid, 3, true, true from users
+},
+:edge_perm => 'edges.val'
+} }) as pq order by origin_uuid, target_uuid
+}, "check_permissions_against_full_refresh.full_recompute"
+
+  if q1.count != q2.count
+    puts "Didn't match incremental+: #{q1.count} != full refresh-: #{q2.count}"
+  end
+
+  if q1.count > q2.count
+    q1.each_with_index do |r, i|
+      if r != q2[i]
+        puts "+#{r}\n-#{q2[i]}"
+        raise "Didn't match"
+      end
+    end
+  else
+    q2.each_with_index do |r, i|
+      if r != q1[i]
+        puts "+#{q1[i]}\n-#{r}"
+        raise "Didn't match"
+      end
+    end
+  end
+end
+
+def skip_check_permissions_against_full_refresh
+  check_perm_was = Thread.current[:no_check_permissions_against_full_refresh]
+  Thread.current[:no_check_permissions_against_full_refresh] = true
+  begin
+    yield
+  ensure
+    Thread.current[:no_check_permissions_against_full_refresh] = check_perm_was
+  end
+end
+
+# Used to account for permissions that a user gains by having
+# can_manage on another user.
+#
+# note: in theory a user could have can_manage access to a user
+# through multiple levels, that isn't handled here (would require a
+# recursive query).  I think that's okay because users getting
+# transitive access through "can_manage" on a user is is rarely/never
+# used feature and something we probably want to deprecate and remove.
+USER_UUIDS_SUBQUERY_TEMPLATE = %{
+select target_uuid from materialized_permissions where user_uuid in (%{user})
+and target_uuid like '_____-tpzed-_______________' and traverse_owned=true and perm_level >= %{perm_level}
+}
diff --git a/services/api/test/fixtures/groups.yml b/services/api/test/fixtures/groups.yml

index 92a1ced52841942b60f3898a58b5818d53b3b14f..22c999ecd752ecea18160b807f5950ee94a8a521 100644 (file)
--- a/services/api/test/fixtures/groups.yml
+++ b/services/api/test/fixtures/groups.yml
@@ -60,6 +60,7 @@ testusergroup_admins:
    uuid: zzzzz-j7d0g-48foin4vonvc2at
    owner_uuid: zzzzz-tpzed-000000000000000
    name: Administrators of a subset of users
+  group_class: role
  
  aproject:
    uuid: zzzzz-j7d0g-v955i6s2oi1cbso
@@ -143,6 +144,7 @@ active_user_has_can_manage:
    uuid: zzzzz-j7d0g-ptt1ou6a9lxrv07
    owner_uuid: zzzzz-tpzed-d9tiejq69daie8f
    name: Active user has can_manage
+  group_class: project
  
  # Group for testing granting permission between users who share a group.
  group_for_sharing_tests:
@@ -343,4 +345,4 @@ trashed_on_next_sweep:
    trash_at: 2001-01-01T00:00:00Z
    delete_at: 2038-03-01T00:00:00Z
    is_trashed: false
-  modified_at: 2001-01-01T00:00:00Z
-\ No newline at end of file
+  modified_at: 2001-01-01T00:00:00Z
diff --git a/services/api/test/fixtures/users.yml b/services/api/test/fixtures/users.yml

index 57633a31203f6ee0b9c8150324ce93a022f4055d..14630d9efa85615a09585082299290b71def8530 100644 (file)
--- a/services/api/test/fixtures/users.yml
+++ b/services/api/test/fixtures/users.yml
@@ -418,3 +418,17 @@ double_redirects_to_active:
        organization: example.com
        role: Computational biologist
      getting_started_shown: 2015-03-26 12:34:56.789000000 Z
+
+has_can_login_permission:
+  owner_uuid: zzzzz-tpzed-000000000000000
+  uuid: zzzzz-tpzed-xabcdjxw79nv3jz
+  email: can-login-user@arvados.local
+  modified_by_client_uuid: zzzzz-ozdt8-teyxzyd8qllg11h
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  first_name: Can_login
+  last_name: User
+  identity_url: https://can-login-user.openid.local
+  is_active: true
+  is_admin: false
+  modified_at: 2015-03-26 12:34:56.789000000 Z
+  username: can-login-user
diff --git a/services/api/test/functional/arvados/v1/groups_controller_test.rb b/services/api/test/functional/arvados/v1/groups_controller_test.rb

index 30ab89c7e2aa4527960e518cdf63a95bbaef4550..2b5e8d5a9d099947adbb21bec74c8508fd456d16 100644 (file)
--- a/services/api/test/functional/arvados/v1/groups_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/groups_controller_test.rb
@@ -505,9 +505,19 @@ class Arvados::V1::GroupsControllerTest < ActionController::TestCase
  
    ### trashed project tests ###
  
-  [:active, :admin].each do |auth|
+  #
+  # The structure is
+  #
+  # trashed_project         (zzzzz-j7d0g-trashedproject1)
+  #   trashed_subproject    (zzzzz-j7d0g-trashedproject2)
+  #   trashed_subproject3   (zzzzz-j7d0g-trashedproject3)
+  #   zzzzz-xvhdp-cr5trashedcontr
+
+  [:active,
+   :admin].each do |auth|
      # project: to query,    to untrash,    is visible, parent contents listing success
-    [[:trashed_project,     [],                 false, true],
+    [
+     [:trashed_project,     [],                 false, true],
       [:trashed_project,     [:trashed_project], true,  true],
       [:trashed_subproject,  [],                 false, false],
       [:trashed_subproject,  [:trashed_project], true,  true],
diff --git a/services/api/test/integration/groups_test.rb b/services/api/test/integration/groups_test.rb

index eb97fc1f49034165e6ae02a1896b116a3e835890..445670a3d51572827289bb0dc37430168cd4eb9f 100644 (file)
--- a/services/api/test/integration/groups_test.rb
+++ b/services/api/test/integration/groups_test.rb
@@ -193,11 +193,15 @@ class NonTransactionalGroupsTest < ActionDispatch::IntegrationTest
      assert_response :success
    end
  
-  test "create request with async=true defers permissions update" do
+  test "create request with async=true does not defer permissions update" do
      Rails.configuration.API.AsyncPermissionsUpdateInterval = 1 # second
      name = "Random group #{rand(1000)}"
      assert_equal nil, Group.find_by_name(name)
  
+    # Following the implementation of incremental permission updates
+    # (#16007) the async flag is now a no-op.  Permission changes are
+    # visible immediately.
+
      # Trigger the asynchronous permission update by using async=true parameter.
      post "/arvados/v1/groups",
        params: {
@@ -209,7 +213,7 @@ class NonTransactionalGroupsTest < ActionDispatch::IntegrationTest
        headers: auth(:active)
      assert_response 202
  
-    # The group exists on the database, but it's not accessible yet.
+    # The group exists in the database
      assert_not_nil Group.find_by_name(name)
      get "/arvados/v1/groups",
        params: {
@@ -218,7 +222,7 @@ class NonTransactionalGroupsTest < ActionDispatch::IntegrationTest
        },
        headers: auth(:active)
      assert_response 200
-    assert_equal 0, json_response['items_available']
+    assert_equal 1, json_response['items_available']
  
      # Wait a bit and try again.
      sleep(1)
diff --git a/services/api/test/performance/permission_test.rb b/services/api/test/performance/permission_test.rb

index a0605f97e72c1a749ccec12b46cd1a406417e2f0..d0e6413b16ab195b0e8fe03ba865abba0a2b3541 100644 (file)
--- a/services/api/test/performance/permission_test.rb
+++ b/services/api/test/performance/permission_test.rb
@@ -40,7 +40,7 @@ class PermissionPerfTest < ActionDispatch::IntegrationTest
                     end
                   end
                 end
-               User.invalidate_permissions_cache
+               refresh_permissions
               end
             end)
      end
diff --git a/services/api/test/test_helper.rb b/services/api/test/test_helper.rb

index 5747a85cf598965d20b563c918a304b01f9dce87..c99a57aaff49b24910df17c4a745088a4903ce22 100644 (file)
--- a/services/api/test/test_helper.rb
+++ b/services/api/test/test_helper.rb
@@ -2,6 +2,8 @@
  #
  # SPDX-License-Identifier: AGPL-3.0
  
+require 'update_permissions'
+
  ENV["RAILS_ENV"] = "test"
  unless ENV["NO_COVERAGE_TEST"]
    begin
@@ -207,4 +209,5 @@ class ActionDispatch::IntegrationTest
  end
  
  # Ensure permissions are computed from the test fixtures.
-User.invalidate_permissions_cache
+refresh_permissions
+refresh_trashed
diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb

index bf1ba517ebcb6bf26aec3027a084fee086ff810b..addea83062404b84baf1911d6b81a262d582ce05 100644 (file)
--- a/services/api/test/unit/collection_test.rb
+++ b/services/api/test/unit/collection_test.rb
@@ -1000,6 +1000,19 @@ class CollectionTest < ActiveSupport::TestCase
    test "delete referring links in SweepTrashedObjects" do
      uuid = collections(:trashed_on_next_sweep).uuid
      act_as_system_user do
+      assert_raises ActiveRecord::RecordInvalid do
+        # Cannot create because :trashed_on_next_sweep is already trashed
+        Link.create!(head_uuid: uuid,
+                     tail_uuid: system_user_uuid,
+                     link_class: 'whatever',
+                     name: 'something')
+      end
+
+      # Bump trash_at to now + 1 minute
+      Collection.where(uuid: uuid).
+        update(trash_at: db_current_time + (1).minute)
+
+      # Not considered trashed now
        Link.create!(head_uuid: uuid,
                     tail_uuid: system_user_uuid,
                     link_class: 'whatever',
diff --git a/services/api/test/unit/owner_test.rb b/services/api/test/unit/owner_test.rb

index 528c6d253f49b9d356a3a7c857e2117690ecd228..ca02e2db5e3a08a8fb3ecdd79222e2835f8a5e2d 100644 (file)
--- a/services/api/test/unit/owner_test.rb
+++ b/services/api/test/unit/owner_test.rb
@@ -70,8 +70,12 @@ class OwnerTest < ActiveSupport::TestCase
               "new #{o_class} should really be in DB")
        old_uuid = o.uuid
        new_uuid = o.uuid.sub(/..........$/, rand(2**256).to_s(36)[0..9])
-      assert(o.update_attributes(uuid: new_uuid),
-             "should change #{o_class} uuid from #{old_uuid} to #{new_uuid}")
+      if o.respond_to? :update_uuid
+        o.update_uuid(new_uuid: new_uuid)
+      else
+        assert(o.update_attributes(uuid: new_uuid),
+               "should change #{o_class} uuid from #{old_uuid} to #{new_uuid}")
+      end
        assert_equal(false, o_class.where(uuid: old_uuid).any?,
                     "#{old_uuid} should disappear when renamed to #{new_uuid}")
      end
@@ -83,9 +87,11 @@ class OwnerTest < ActiveSupport::TestCase
        assert_equal(true, Specimen.where(owner_uuid: o.uuid).any?,
                     "need something to be owned by #{o.uuid} for this test")
  
-      assert_raises(ActiveRecord::DeleteRestrictionError,
-                    "should not delete #{ofixt} that owns objects") do
-        o.destroy
+      skip_check_permissions_against_full_refresh do
+        assert_raises(ActiveRecord::DeleteRestrictionError,
+                      "should not delete #{ofixt} that owns objects") do
+          o.destroy
+        end
        end
      end
  
@@ -104,9 +110,14 @@ class OwnerTest < ActiveSupport::TestCase
      assert User.where(uuid: o.uuid).any?, "new User should really be in DB"
      assert_equal(true, o.update_attributes(owner_uuid: o.uuid),
                   "setting owner to self should work")
-    assert(o.destroy, "should delete User that owns self")
+
+    skip_check_permissions_against_full_refresh do
+      assert(o.destroy, "should delete User that owns self")
+    end
+
      assert_equal(false, User.where(uuid: o.uuid).any?,
                   "#{o.uuid} should not be in DB after deleting")
+    check_permissions_against_full_refresh
    end
  
    test "change uuid of User that owns self" do
@@ -116,8 +127,8 @@ class OwnerTest < ActiveSupport::TestCase
                   "setting owner to self should work")
      old_uuid = o.uuid
      new_uuid = o.uuid.sub(/..........$/, rand(2**256).to_s(36)[0..9])
-    assert(o.update_attributes(uuid: new_uuid),
-           "should change uuid of User that owns self")
+    o.update_uuid(new_uuid: new_uuid)
+    o = User.find_by_uuid(new_uuid)
      assert_equal(false, User.where(uuid: old_uuid).any?,
                   "#{old_uuid} should not be in DB after deleting")
      assert_equal(true, User.where(uuid: new_uuid).any?,
diff --git a/services/api/test/unit/permission_test.rb b/services/api/test/unit/permission_test.rb

index 18d2fbbcb5f7cef9d87cc71df1077b5a4c8cf1d6..cb5ae7ba2f2367462229927e364e640913727141 100644 (file)
--- a/services/api/test/unit/permission_test.rb
+++ b/services/api/test/unit/permission_test.rb
@@ -10,7 +10,7 @@ class PermissionTest < ActiveSupport::TestCase
    test "Grant permissions on an object I own" do
      set_user_from_auth :active_trustedclient
  
-    ob = Specimen.create
+    ob = Collection.create
      assert ob.save
  
      # Ensure I have permission to manage this group even when its owner changes
@@ -24,7 +24,7 @@ class PermissionTest < ActiveSupport::TestCase
    test "Delete permission links when deleting an object" do
      set_user_from_auth :active_trustedclient
  
-    ob = Specimen.create!
+    ob = Collection.create!
      Link.create!(tail_uuid: users(:active).uuid,
                   head_uuid: ob.uuid,
                   link_class: 'permission',
@@ -37,7 +37,7 @@ class PermissionTest < ActiveSupport::TestCase
  
    test "permission links owned by root" do
      set_user_from_auth :active_trustedclient
-    ob = Specimen.create!
+    ob = Collection.create!
      perm_link = Link.create!(tail_uuid: users(:active).uuid,
                               head_uuid: ob.uuid,
                               link_class: 'permission',
@@ -48,18 +48,18 @@ class PermissionTest < ActiveSupport::TestCase
    test "readable_by" do
      set_user_from_auth :active_trustedclient
  
-    ob = Specimen.create!
+    ob = Collection.create!
      Link.create!(tail_uuid: users(:active).uuid,
                   head_uuid: ob.uuid,
                   link_class: 'permission',
                   name: 'can_read')
-    assert Specimen.readable_by(users(:active)).where(uuid: ob.uuid).any?, "user does not have read permission"
+    assert Collection.readable_by(users(:active)).where(uuid: ob.uuid).any?, "user does not have read permission"
    end
  
    test "writable_by" do
      set_user_from_auth :active_trustedclient
  
-    ob = Specimen.create!
+    ob = Collection.create!
      Link.create!(tail_uuid: users(:active).uuid,
                   head_uuid: ob.uuid,
                   link_class: 'permission',
@@ -67,6 +67,34 @@ class PermissionTest < ActiveSupport::TestCase
      assert ob.writable_by.include?(users(:active).uuid), "user does not have write permission"
    end
  
+  test "update permission link" do
+    set_user_from_auth :admin
+
+    grp = Group.create! name: "blah project", group_class: "project"
+    ob = Collection.create! owner_uuid: grp.uuid
+
+    assert !users(:active).can?(write: ob)
+    assert !users(:active).can?(read: ob)
+
+    l1 = Link.create!(tail_uuid: users(:active).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_write')
+
+    assert users(:active).can?(write: ob)
+    assert users(:active).can?(read: ob)
+
+    l1.update_attributes!(name: 'can_read')
+
+    assert !users(:active).can?(write: ob)
+    assert users(:active).can?(read: ob)
+
+    l1.destroy
+
+    assert !users(:active).can?(write: ob)
+    assert !users(:active).can?(read: ob)
+  end
+
    test "writable_by reports requesting user's own uuid for a writable project" do
      invited_to_write = users(:project_viewer)
      group = groups(:asubproject)
@@ -124,16 +152,16 @@ class PermissionTest < ActiveSupport::TestCase
    test "user owns group, group can_manage object's group, user can add permissions" do
      set_user_from_auth :admin
  
-    owner_grp = Group.create!(owner_uuid: users(:active).uuid)
-
-    sp_grp = Group.create!
-    sp = Specimen.create!(owner_uuid: sp_grp.uuid)
+    owner_grp = Group.create!(owner_uuid: users(:active).uuid, group_class: "role")
+    sp_grp = Group.create!(group_class: "project")
  
      Link.create!(link_class: 'permission',
                   name: 'can_manage',
                   tail_uuid: owner_grp.uuid,
                   head_uuid: sp_grp.uuid)
  
+    sp = Collection.create!(owner_uuid: sp_grp.uuid)
+
      # active user owns owner_grp, which has can_manage permission on sp_grp
      # user should be able to add permissions on sp.
      set_user_from_auth :active_trustedclient
@@ -149,7 +177,7 @@ class PermissionTest < ActiveSupport::TestCase
    skip "can_manage permission on a non-group object" do
      set_user_from_auth :admin
  
-    ob = Specimen.create!
+    ob = Collection.create!
      # grant can_manage permission to active
      perm_link = Link.create!(tail_uuid: users(:active).uuid,
                               head_uuid: ob.uuid,
@@ -170,7 +198,7 @@ class PermissionTest < ActiveSupport::TestCase
    test "user without can_manage permission may not modify permission link" do
      set_user_from_auth :admin
  
-    ob = Specimen.create!
+    ob = Collection.create!
      # grant can_manage permission to active
      perm_link = Link.create!(tail_uuid: users(:active).uuid,
                               head_uuid: ob.uuid,
@@ -192,7 +220,8 @@ class PermissionTest < ActiveSupport::TestCase
      manager = create :active_user, first_name: "Manage", last_name: "Er"
      minion = create :active_user, first_name: "Min", last_name: "Ion"
      minions_specimen = act_as_user minion do
-      Specimen.create!
+      g = Group.create! name: "minon project", group_class: "project"
+      Collection.create! owner_uuid: g.uuid
      end
      # Manager creates a group. (Make sure it doesn't magically give
      # anyone any additional permissions.)
@@ -255,7 +284,7 @@ class PermissionTest < ActiveSupport::TestCase
          create(:permission_link,
                 name: 'can_manage', tail_uuid: manager.uuid, head_uuid: minion.uuid)
        end
-      assert_empty(Specimen
+      assert_empty(Collection
                       .readable_by(manager)
                       .where(uuid: minions_specimen.uuid),
                     "manager saw the minion's private stuff")
@@ -273,7 +302,7 @@ class PermissionTest < ActiveSupport::TestCase
  
      act_as_user manager do
        # Now, manager can read and write Minion's stuff.
-      assert_not_empty(Specimen
+      assert_not_empty(Collection
                           .readable_by(manager)
                           .where(uuid: minions_specimen.uuid),
                         "manager could not find minion's specimen by uuid")
@@ -309,12 +338,12 @@ class PermissionTest < ActiveSupport::TestCase
                       "#{a.first_name} should be able to see 'b' in the user list")
  
      a_specimen = act_as_user a do
-      Specimen.create!
+      Collection.create!
      end
-    assert_not_empty(Specimen.readable_by(a).where(uuid: a_specimen.uuid),
-                     "A cannot read own Specimen, following test probably useless.")
-    assert_empty(Specimen.readable_by(b).where(uuid: a_specimen.uuid),
-                 "B can read A's Specimen")
+    assert_not_empty(Collection.readable_by(a).where(uuid: a_specimen.uuid),
+                     "A cannot read own Collection, following test probably useless.")
+    assert_empty(Collection.readable_by(b).where(uuid: a_specimen.uuid),
+                 "B can read A's Collection")
      [a,b].each do |u|
        assert_empty(User.readable_by(u).where(uuid: other.uuid),
                     "#{u.first_name} can see OTHER in the user list")
@@ -341,13 +370,13 @@ class PermissionTest < ActiveSupport::TestCase
    test "cannot create with owner = unwritable user" do
      set_user_from_auth :rominiadmin
      assert_raises ArvadosModel::PermissionDeniedError, "created with owner = unwritable user" do
-      Specimen.create!(owner_uuid: users(:active).uuid)
+      Collection.create!(owner_uuid: users(:active).uuid)
      end
    end
  
    test "cannot change owner to unwritable user" do
      set_user_from_auth :rominiadmin
-    ob = Specimen.create!
+    ob = Collection.create!
      assert_raises ArvadosModel::PermissionDeniedError, "changed owner to unwritable user" do
        ob.update_attributes!(owner_uuid: users(:active).uuid)
      end
@@ -356,13 +385,13 @@ class PermissionTest < ActiveSupport::TestCase
    test "cannot create with owner = unwritable group" do
      set_user_from_auth :rominiadmin
      assert_raises ArvadosModel::PermissionDeniedError, "created with owner = unwritable group" do
-      Specimen.create!(owner_uuid: groups(:aproject).uuid)
+      Collection.create!(owner_uuid: groups(:aproject).uuid)
      end
    end
  
    test "cannot change owner to unwritable group" do
      set_user_from_auth :rominiadmin
-    ob = Specimen.create!
+    ob = Collection.create!
      assert_raises ArvadosModel::PermissionDeniedError, "changed owner to unwritable group" do
        ob.update_attributes!(owner_uuid: groups(:aproject).uuid)
      end
@@ -390,4 +419,159 @@ class PermissionTest < ActiveSupport::TestCase
  
      assert_not_empty container_logs(:running_older, :anonymous)
    end
+
+  test "add user to group, then remove them" do
+    set_user_from_auth :admin
+    grp = Group.create!(owner_uuid: system_user_uuid, group_class: "role")
+    col = Collection.create!(owner_uuid: grp.uuid)
+    assert_empty Collection.readable_by(users(:active)).where(uuid: col.uuid)
+    assert_empty User.readable_by(users(:active)).where(uuid: users(:project_viewer).uuid)
+
+    l1 = Link.create!(tail_uuid: users(:active).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+    l2 = Link.create!(tail_uuid: grp.uuid,
+                 head_uuid: users(:active).uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    l3 = Link.create!(tail_uuid: users(:project_viewer).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+    l4 = Link.create!(tail_uuid: grp.uuid,
+                 head_uuid: users(:project_viewer).uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert User.readable_by(users(:active)).where(uuid: users(:project_viewer).uuid).first
+
+    l1.destroy
+    l2.destroy
+
+    assert_empty Collection.readable_by(users(:active)).where(uuid: col.uuid)
+    assert_empty User.readable_by(users(:active)).where(uuid: users(:project_viewer).uuid)
+
+  end
+
+
+  test "add user to group, then change permission level" do
+    set_user_from_auth :admin
+    grp = Group.create!(owner_uuid: system_user_uuid, group_class: "role")
+    col = Collection.create!(owner_uuid: grp.uuid)
+    assert_empty Collection.readable_by(users(:active)).where(uuid: col.uuid)
+    assert_empty User.readable_by(users(:active)).where(uuid: users(:project_viewer).uuid)
+
+    l1 = Link.create!(tail_uuid: users(:active).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_manage')
+    l2 = Link.create!(tail_uuid: grp.uuid,
+                 head_uuid: users(:active).uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert users(:active).can?(read: col.uuid)
+    assert users(:active).can?(write: col.uuid)
+    assert users(:active).can?(manage: col.uuid)
+
+    l1.name = 'can_read'
+    l1.save!
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert users(:active).can?(read: col.uuid)
+    assert !users(:active).can?(write: col.uuid)
+    assert !users(:active).can?(manage: col.uuid)
+
+    l1.name = 'can_write'
+    l1.save!
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert users(:active).can?(read: col.uuid)
+    assert users(:active).can?(write: col.uuid)
+    assert !users(:active).can?(manage: col.uuid)
+  end
+
+
+  test "add user to group, then add overlapping permission link to group" do
+    set_user_from_auth :admin
+    grp = Group.create!(owner_uuid: system_user_uuid, group_class: "role")
+    col = Collection.create!(owner_uuid: grp.uuid)
+    assert_empty Collection.readable_by(users(:active)).where(uuid: col.uuid)
+    assert_empty User.readable_by(users(:active)).where(uuid: users(:project_viewer).uuid)
+
+    l1 = Link.create!(tail_uuid: users(:active).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_manage')
+    l2 = Link.create!(tail_uuid: grp.uuid,
+                 head_uuid: users(:active).uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert users(:active).can?(read: col.uuid)
+    assert users(:active).can?(write: col.uuid)
+    assert users(:active).can?(manage: col.uuid)
+
+    l3 = Link.create!(tail_uuid: users(:active).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert users(:active).can?(read: col.uuid)
+    assert users(:active).can?(write: col.uuid)
+    assert users(:active).can?(manage: col.uuid)
+
+    l3.destroy!
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert users(:active).can?(read: col.uuid)
+    assert users(:active).can?(write: col.uuid)
+    assert users(:active).can?(manage: col.uuid)
+  end
+
+
+  test "add user to group, then add overlapping permission link to subproject" do
+    set_user_from_auth :admin
+    grp = Group.create!(owner_uuid: system_user_uuid, group_class: "project")
+    prj = Group.create!(owner_uuid: grp.uuid, group_class: "project")
+    assert_empty Group.readable_by(users(:active)).where(uuid: prj.uuid)
+    assert_empty User.readable_by(users(:active)).where(uuid: users(:project_viewer).uuid)
+
+    l1 = Link.create!(tail_uuid: users(:active).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_manage')
+    l2 = Link.create!(tail_uuid: grp.uuid,
+                 head_uuid: users(:active).uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    assert Group.readable_by(users(:active)).where(uuid: prj.uuid).first
+    assert users(:active).can?(read: prj.uuid)
+    assert users(:active).can?(write: prj.uuid)
+    assert users(:active).can?(manage: prj.uuid)
+
+    l3 = Link.create!(tail_uuid: grp.uuid,
+                 head_uuid: prj.uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    assert Group.readable_by(users(:active)).where(uuid: prj.uuid).first
+    assert users(:active).can?(read: prj.uuid)
+    assert users(:active).can?(write: prj.uuid)
+    assert users(:active).can?(manage: prj.uuid)
+
+    l3.destroy!
+
+    assert Group.readable_by(users(:active)).where(uuid: prj.uuid).first
+    assert users(:active).can?(read: prj.uuid)
+    assert users(:active).can?(write: prj.uuid)
+    assert users(:active).can?(manage: prj.uuid)
+  end
  end
diff --git a/services/api/test/unit/user_test.rb b/services/api/test/unit/user_test.rb

index 260795c12f8969333044f3c8917f6fe8cd2432e8..7fcd36d7091a4c7a00a9af6ae50f10f5a413871d 100644 (file)
--- a/services/api/test/unit/user_test.rb
+++ b/services/api/test/unit/user_test.rb
@@ -165,7 +165,9 @@ class UserTest < ActiveSupport::TestCase
  
        if auto_admin_first_user_config
          # This test requires no admin users exist (except for the system user)
-        users(:admin).delete
+        act_as_system_user do
+          users(:admin).update_attributes!(is_admin: false)
+        end
          @all_users = User.where("uuid not like '%-000000000000000'").where(:is_admin => true)
          assert_equal 0, @all_users.count, "No admin users should exist (except for the system user)"
        end
@@ -476,15 +478,6 @@ class UserTest < ActiveSupport::TestCase
  
      vm = VirtualMachine.create
  
-    # Set up the bogus Link
-    bad_uuid = 'zzzzz-tpzed-xyzxyzxyzxyzxyz'
-
-    resp_link = Link.create ({tail_uuid: email, link_class: 'permission',
-        name: 'can_login', head_uuid: bad_uuid})
-    resp_link.save(validate: false)
-
-    verify_link resp_link, 'permission', 'can_login', email, bad_uuid
-
      response = user.setup(repo_name: 'foo/testrepo',
                            vm_uuid: vm.uuid)
  
diff --git a/services/keepstore/unix_volume.go b/services/keepstore/unix_volume.go

index 5026e2d32558e085886ba119cf0b664bfbc58473..1706473cc892c43cbd5ad27751c49f43cbebc075 100644 (file)
--- a/services/keepstore/unix_volume.go
+++ b/services/keepstore/unix_volume.go
@@ -699,10 +699,20 @@ func (v *UnixVolume) EmptyTrash() {
         err := filepath.Walk(v.Root, func(path string, info os.FileInfo, err error) error {
                 if err != nil {
                         v.logger.WithError(err).Errorf("EmptyTrash: filepath.Walk(%q) failed", path)
+                       // Don't give up -- keep walking other
+                       // files/dirs
                         return nil
+               } else if !info.Mode().IsDir() {
+                       todo <- dirent{path, info}
+                       return nil
+               } else if path == v.Root || blockDirRe.MatchString(info.Name()) {
+                       // Descend into a directory that we might have
+                       // put trash in.
+                       return nil
+               } else {
+                       // Don't descend into other dirs.
+                       return filepath.SkipDir
                 }
-               todo <- dirent{path, info}
-               return nil
         })
         close(todo)
         wg.Wait()
diff --git a/services/keepstore/unix_volume_test.go b/services/keepstore/unix_volume_test.go

index 5a3a536944daa5b8012bc0b2afbf8b6932862364..6b42dbc519ac933a0ddca0092fc1b14fb1b599d8 100644 (file)
--- a/services/keepstore/unix_volume_test.go
+++ b/services/keepstore/unix_volume_test.go
@@ -424,3 +424,26 @@ func (s *UnixVolumeSuite) TestStats(c *check.C) {
         c.Check(err, check.IsNil)
         c.Check(stats(), check.Matches, `.*"FlockOps":2,.*`)
  }
+
+func (s *UnixVolumeSuite) TestSkipUnusedDirs(c *check.C) {
+       vol := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
+
+       err := os.Mkdir(vol.UnixVolume.Root+"/aaa", 0777)
+       c.Assert(err, check.IsNil)
+       err = os.Mkdir(vol.UnixVolume.Root+"/.aaa", 0777) // EmptyTrash should not look here
+       c.Assert(err, check.IsNil)
+       deleteme := vol.UnixVolume.Root + "/aaa/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.trash.1"
+       err = ioutil.WriteFile(deleteme, []byte{1, 2, 3}, 0777)
+       c.Assert(err, check.IsNil)
+       skipme := vol.UnixVolume.Root + "/.aaa/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.trash.1"
+       err = ioutil.WriteFile(skipme, []byte{1, 2, 3}, 0777)
+       c.Assert(err, check.IsNil)
+       vol.EmptyTrash()
+
+       _, err = os.Stat(skipme)
+       c.Check(err, check.IsNil)
+
+       _, err = os.Stat(deleteme)
+       c.Check(err, check.NotNil)
+       c.Check(os.IsNotExist(err), check.Equals, true)
+}
author	Ward Vandewege <ward@curii.com>
	Fri, 19 Jun 2020 15:31:54 +0000 (11:31 -0400)
committer	Ward Vandewege <ward@curii.com>
	Fri, 19 Jun 2020 15:32:53 +0000 (11:32 -0400)
cmd/arvados-server/cmd.go		patch \| blob \| history
doc/_config.yml		patch \| blob \| history
doc/admin/recovering-deleted-collections.html.textile.liquid	[new file with mode: 0644]	patch \| blob
doc/install/setup-login.html.textile.liquid		patch \| blob \| history
lib/config/config.default.yml		patch \| blob \| history
lib/config/export.go		patch \| blob \| history
lib/config/generated_config.go		patch \| blob \| history
lib/controller/localdb/login.go		patch \| blob \| history
lib/controller/localdb/login_oidc.go		patch \| blob \| history
lib/controller/localdb/login_oidc_test.go		patch \| blob \| history
lib/recovercollection/cmd.go	[moved from lib/undelete/cmd.go with 62% similarity]	patch \| blob \| history
lib/recovercollection/cmd_test.go	[moved from lib/undelete/cmd_test.go with 76% similarity]	patch \| blob \| history
sdk/go/arvados/config.go		patch \| blob \| history
services/api/app/controllers/database_controller.rb		patch \| blob \| history
services/api/app/models/arvados_model.rb		patch \| blob \| history
services/api/app/models/database_seeds.rb		patch \| blob \| history
services/api/app/models/group.rb		patch \| blob \| history
services/api/app/models/link.rb		patch \| blob \| history
services/api/app/models/materialized_permission.rb	[new file with mode: 0644]	patch \| blob
services/api/app/models/trashed_group.rb	[new file with mode: 0644]	patch \| blob
services/api/app/models/user.rb		patch \| blob \| history
services/api/db/migrate/20200501150153_permission_table.rb	[new file with mode: 0644]	patch \| blob
services/api/db/structure.sql		patch \| blob \| history
services/api/lib/20200501150153_permission_table_constants.rb	[new file with mode: 0644]	patch \| blob
services/api/lib/refresh_permission_view.rb	[deleted file]	patch \| blob \| history
services/api/lib/update_permissions.rb	[new file with mode: 0644]	patch \| blob
services/api/test/fixtures/groups.yml		patch \| blob \| history
services/api/test/fixtures/users.yml		patch \| blob \| history
services/api/test/functional/arvados/v1/groups_controller_test.rb		patch \| blob \| history
services/api/test/integration/groups_test.rb		patch \| blob \| history
services/api/test/performance/permission_test.rb		patch \| blob \| history
services/api/test/test_helper.rb		patch \| blob \| history
services/api/test/unit/collection_test.rb		patch \| blob \| history
services/api/test/unit/owner_test.rb		patch \| blob \| history
services/api/test/unit/permission_test.rb		patch \| blob \| history
services/api/test/unit/user_test.rb		patch \| blob \| history
services/keepstore/unix_volume.go		patch \| blob \| history
services/keepstore/unix_volume_test.go		patch \| blob \| history