16482: Merge branch 'master' into 16482-bump-cwltool-version

author Ward Vandewege <ward@jhvc.com>

Thu, 18 Jun 2020 15:52:23 +0000 (11:52 -0400)

committer Ward Vandewege <ward@jhvc.com>

Thu, 18 Jun 2020 15:53:12 +0000 (11:53 -0400)
author Ward Vandewege <ward@jhvc.com>
Thu, 18 Jun 2020 15:52:23 +0000 (11:52 -0400)
committer Ward Vandewege <ward@jhvc.com>
Thu, 18 Jun 2020 15:53:12 +0000 (11:53 -0400)
diff --git a/apps/workbench/Gemfile.lock b/apps/workbench/Gemfile.lock

index e722fa24196d1bc9b38ced9ba1cc27102471ac88..2420fee24d07e056d3ee4b7047f43f87dd1b5d6d 100644 (file)
--- a/apps/workbench/Gemfile.lock
+++ b/apps/workbench/Gemfile.lock
@@ -315,7 +315,7 @@ GEM
        json (>= 1.8.0)
      websocket-driver (0.6.5)
        websocket-extensions (>= 0.1.0)
-    websocket-extensions (0.1.3)
+    websocket-extensions (0.1.5)
      xpath (2.1.0)
        nokogiri (~> 1.3)
  
diff --git a/apps/workbench/app/views/layouts/application.html.erb b/apps/workbench/app/views/layouts/application.html.erb

index 4fc7da9949cc7ebdaca5bf3cf24d54456ae8d5b6..c0f01da283aa3ff88168f8fb5cf6f01a56d0f92b 100644 (file)
--- a/apps/workbench/app/views/layouts/application.html.erb
+++ b/apps/workbench/app/views/layouts/application.html.erb
@@ -20,7 +20,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
    <meta name="author" content="">
    <% if current_user %>
      <% content_for :js do %>
-      window.defaultSession = <%=raw({baseURL: Rails.configuration.Services.Controller.ExternalURL.to_s.gsub(/\/?$/,'/'), token: Thread.current[:arvados_api_token], user: current_user}.to_json)%>
+      window.defaultSession = <%=raw({baseURL: Rails.configuration.Services.Controller.ExternalURL.to_s.sub(/\/*$/,'/'), token: Thread.current[:arvados_api_token], user: current_user}.to_json)%>
      <% end %>
    <% end %>
    <% if current_user and $arvados_api_client.discovery[:websocketUrl] %>
@@ -49,7 +49,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
    <%= yield :head %>
    <%= javascript_tag do %>
      angular.module('Arvados').value('arvadosApiToken', '<%=Thread.current[:arvados_api_token]%>');
-    angular.module('Arvados').value('arvadosDiscoveryUri', '<%= Rails.configuration.Services.Controller.ExternalURL.to_s + '/discovery/v1/apis/arvados/v1/rest' %>');
+    angular.module('Arvados').value('arvadosDiscoveryUri', '<%= Rails.configuration.Services.Controller.ExternalURL.to_s.sub(/\/*$/,'/') + 'discovery/v1/apis/arvados/v1/rest' %>');
    <%= yield :js %>
    <% end %>
    <style>
diff --git a/cmd/arvados-server/cmd.go b/cmd/arvados-server/cmd.go

index fcea2223da70d5a174ee74b8281ebd3d20e0b503..ff99de75c41ad13f630d0902c2e695c6c17ad5c9 100644 (file)
--- a/cmd/arvados-server/cmd.go
+++ b/cmd/arvados-server/cmd.go
@@ -15,6 +15,7 @@ import (
         "git.arvados.org/arvados.git/lib/crunchrun"
         "git.arvados.org/arvados.git/lib/dispatchcloud"
         "git.arvados.org/arvados.git/lib/install"
+       "git.arvados.org/arvados.git/lib/recovercollection"
         "git.arvados.org/arvados.git/services/ws"
  )
  
@@ -24,16 +25,17 @@ var (
                 "-version":  cmd.Version,
                 "--version": cmd.Version,
  
-               "boot":            boot.Command,
-               "cloudtest":       cloudtest.Command,
-               "config-check":    config.CheckCommand,
-               "config-defaults": config.DumpDefaultsCommand,
-               "config-dump":     config.DumpCommand,
-               "controller":      controller.Command,
-               "crunch-run":      crunchrun.Command,
-               "dispatch-cloud":  dispatchcloud.Command,
-               "install":         install.Command,
-               "ws":              ws.Command,
+               "boot":               boot.Command,
+               "cloudtest":          cloudtest.Command,
+               "config-check":       config.CheckCommand,
+               "config-defaults":    config.DumpDefaultsCommand,
+               "config-dump":        config.DumpCommand,
+               "controller":         controller.Command,
+               "crunch-run":         crunchrun.Command,
+               "dispatch-cloud":     dispatchcloud.Command,
+               "install":            install.Command,
+               "recover-collection": recovercollection.Command,
+               "ws":                 ws.Command,
         })
  )
  
diff --git a/doc/_config.yml b/doc/_config.yml

index 48fe1b53d49149139cf36e0b602e0f0fe4d2ab3e..3b59cbca45205983ba4b83429f06b914946a53dd 100644 (file)
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -174,6 +174,7 @@ navbar:
        - admin/logs-table-management.html.textile.liquid
        - admin/workbench2-vocabulary.html.textile.liquid
        - admin/storage-classes.html.textile.liquid
+      - admin/recovering-deleted-collections.html.textile.liquid
      - Cloud:
        - admin/spot-instances.html.textile.liquid
        - admin/cloudtest.html.textile.liquid
diff --git a/doc/admin/recovering-deleted-collections.html.textile.liquid b/doc/admin/recovering-deleted-collections.html.textile.liquid

new file mode 100644 (file)

index 0000000..59c576c
--- /dev/null
+++ b/doc/admin/recovering-deleted-collections.html.textile.liquid
@@ -0,0 +1,37 @@
+---
+layout: default
+navsection: admin
+title: Recovering deleted collections
+...
+
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+In some cases, it is possible to recover files that have been lost by modifying or deleting a collection.
+
+Possibility of recovery depends on many factors, including:
+* Whether the collection manifest is still available, e.g., in an audit log entry
+* Whether the data blocks are also referenced by other collections
+* Whether the data blocks have been unreferenced long enough to be marked for deletion/trash by keep-balance
+* Blob signature TTL, trash lifetime, trash check interval, and other config settings
+
+To attempt recovery of a previous version of a deleted/modified collection, use the @arvados-server recover-collection@ command. It should be run on one of your server nodes where the @arvados-server@ package is installed and the @/etc/arvados/config.yml@ file is up to date.
+
+Specify the collection you want to recover by passing either the UUID of an audit log entry, or a file containing the manifest.
+
+If recovery is successful, the @recover-collection@ program saves the recovered data a new collection belonging to the system user, and prints the new collection's UUID on stdout.
+
+<pre>
+# arvados-server recover-collection 9tee4-57u5n-nb5awmk1pahac2t
+INFO[2020-06-05T19:52:29.557761245Z] loaded log entry                              logged_event_time="2020-06-05 16:48:01.438791 +0000 UTC" logged_event_type=update old_collection_uuid=9tee4-4zz18-1ex26g95epmgw5w src=9tee4-57u5n-nb5awmk1pahac2t
+INFO[2020-06-05T19:52:29.642145127Z] recovery succeeded                            UUID=9tee4-4zz18-5trfp4k4xxg97f1 src=9tee4-57u5n-nb5awmk1pahac2t
+9tee4-4zz18-5trfp4k4xxg97f1
+INFO[2020-06-05T19:52:29.644699436Z] exiting
+</pre>
+
+In this example, the original data has been restored and saved in a new collection with UUID @9tee4-4zz18-5trfp4k4xxg97f1@.
+
+For more options, run @arvados-server recover-collection -help@.
diff --git a/doc/install/setup-login.html.textile.liquid b/doc/install/setup-login.html.textile.liquid

index 3fe442c75b2e48e67cc9de126abeabb011fb65c4..aec82cfe2a583dd2eaf2d251532e4d46d625ff5e 100644 (file)
--- a/doc/install/setup-login.html.textile.liquid
+++ b/doc/install/setup-login.html.textile.liquid
@@ -12,6 +12,7 @@ SPDX-License-Identifier: CC-BY-SA-3.0
  Select one of the following login mechanisms for your cluster.
  
  # If all users will authenticate with Google, "configure Google login":#google.
+# If all users will authenticate with an OpenID Connect provider (other than Google), "configure OpenID Connect":#oidc.
  # If all users will authenticate with an existing LDAP service, "configure LDAP":#ldap.
  # If all users will authenticate using PAM as configured on your controller node, "configure PAM":#pam.
  
@@ -42,6 +43,21 @@ Use the <a href="https://console.developers.google.com" target="_blank">Google D
          ClientSecret: "zzzzzzzzzzzzzzzzzzzzzzzz"
  </pre>
  
+h2(#oidc). OpenID Connect
+
+With this configuration, users will sign in with a third-party OpenID Connect provider. The provider will supply appropriate values for the issuer URL, client ID, and client secret config entries.
+
+<pre>
+    Login:
+      OpenIDConnect:
+        Enable: true
+        Issuer: https://accounts.example.com/
+        ClientID: "0123456789abcdef"
+        ClientSecret: "zzzzzzzzzzzzzzzzzzzzzzzz"
+</pre>
+
+Check the OpenIDConnect section in the "default config file":{{site.baseurl}}/admin/config.html for more details and configuration options.
+
  h2(#ldap). LDAP
  
  With this configuration, authentication uses an external LDAP service like OpenLDAP or Active Directory.
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml

index 29418baa670666208fc9c64404ef6b8048bc0b73..b9bc9c2c5ce4a28eb25015961b687cea449d503a 100644 (file)
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -551,6 +551,40 @@ Clusters:
          # work. If false, only the primary email address will be used.
          AlternateEmailAddresses: true
  
+      OpenIDConnect:
+        # Authenticate with an OpenID Connect provider.
+        Enable: false
+
+        # Issuer URL, e.g., "https://login.example.com".
+        #
+        # This must be exactly equal to the URL returned by the issuer
+        # itself in its config response ("isser" key). If the
+        # configured value is "https://example" and the provider
+        # returns "https://example:443" or "https://example/" then
+        # login will fail, even though those URLs are equivalent
+        # (RFC3986).
+        Issuer: ""
+
+        # Your client ID and client secret (supplied by the provider).
+        ClientID: ""
+        ClientSecret: ""
+
+        # OpenID claim field containing the user's email
+        # address. Normally "email"; see
+        # https://openid.net/specs/openid-connect-core-1_0.html#StandardClaims
+        EmailClaim: "email"
+
+        # OpenID claim field containing the email verification
+        # flag. Normally "email_verified".  To accept every returned
+        # email address without checking a "verified" field at all,
+        # use the empty string "".
+        EmailVerifiedClaim: "email_verified"
+
+        # OpenID claim field containing the user's preferred
+        # username. If empty, use the mailbox part of the user's email
+        # address.
+        UsernameClaim: ""
+
        PAM:
          # (Experimental) Use PAM to authenticate users.
          Enable: false
diff --git a/lib/config/export.go b/lib/config/export.go

index 26782c8ba61dca3e4b657b911b6361439522ebb6..0ad4222f551ba1220d2459f4330e9a1e05240d44 100644 (file)
--- a/lib/config/export.go
+++ b/lib/config/export.go
@@ -151,6 +151,14 @@ var whitelist = map[string]bool{
         "Login.LDAP.URL":                               false,
         "Login.LDAP.UsernameAttribute":                 false,
         "Login.LoginCluster":                           true,
+       "Login.OpenIDConnect":                          true,
+       "Login.OpenIDConnect.ClientID":                 false,
+       "Login.OpenIDConnect.ClientSecret":             false,
+       "Login.OpenIDConnect.Enable":                   true,
+       "Login.OpenIDConnect.Issuer":                   false,
+       "Login.OpenIDConnect.EmailClaim":               false,
+       "Login.OpenIDConnect.EmailVerifiedClaim":       false,
+       "Login.OpenIDConnect.UsernameClaim":            false,
         "Login.PAM":                                    true,
         "Login.PAM.DefaultEmailDomain":                 false,
         "Login.PAM.Enable":                             true,
diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go

index 6a4ced7c6ea9776e0cb3f6fd92b052eb3637ddbd..758dc2677cf233b0d4d61462e7ec73d607f69174 100644 (file)
--- a/lib/config/generated_config.go
+++ b/lib/config/generated_config.go
@@ -557,6 +557,40 @@ Clusters:
          # work. If false, only the primary email address will be used.
          AlternateEmailAddresses: true
  
+      OpenIDConnect:
+        # Authenticate with an OpenID Connect provider.
+        Enable: false
+
+        # Issuer URL, e.g., "https://login.example.com".
+        #
+        # This must be exactly equal to the URL returned by the issuer
+        # itself in its config response ("isser" key). If the
+        # configured value is "https://example" and the provider
+        # returns "https://example:443" or "https://example/" then
+        # login will fail, even though those URLs are equivalent
+        # (RFC3986).
+        Issuer: ""
+
+        # Your client ID and client secret (supplied by the provider).
+        ClientID: ""
+        ClientSecret: ""
+
+        # OpenID claim field containing the user's email
+        # address. Normally "email"; see
+        # https://openid.net/specs/openid-connect-core-1_0.html#StandardClaims
+        EmailClaim: "email"
+
+        # OpenID claim field containing the email verification
+        # flag. Normally "email_verified".  To accept every returned
+        # email address without checking a "verified" field at all,
+        # use the empty string "".
+        EmailVerifiedClaim: "email_verified"
+
+        # OpenID claim field containing the user's preferred
+        # username. If empty, use the mailbox part of the user's email
+        # address.
+        UsernameClaim: ""
+
        PAM:
          # (Experimental) Use PAM to authenticate users.
          Enable: false
diff --git a/lib/config/load.go b/lib/config/load.go

index 86a8f7df6d2cd4ccfdb68beec66f71dd7204f4cc..be6181bbe9bbc033cd9241cc14a8eca36ed23610 100644 (file)
--- a/lib/config/load.go
+++ b/lib/config/load.go
@@ -64,14 +64,16 @@ func NewLoader(stdin io.Reader, logger logrus.FieldLogger) *Loader {
  //     // ldr.Path == "/tmp/c.yaml"
  func (ldr *Loader) SetupFlags(flagset *flag.FlagSet) {
         flagset.StringVar(&ldr.Path, "config", arvados.DefaultConfigFile, "Site configuration `file` (default may be overridden by setting an ARVADOS_CONFIG environment variable)")
-       flagset.StringVar(&ldr.KeepstorePath, "legacy-keepstore-config", defaultKeepstoreConfigPath, "Legacy keepstore configuration `file`")
-       flagset.StringVar(&ldr.KeepWebPath, "legacy-keepweb-config", defaultKeepWebConfigPath, "Legacy keep-web configuration `file`")
-       flagset.StringVar(&ldr.CrunchDispatchSlurmPath, "legacy-crunch-dispatch-slurm-config", defaultCrunchDispatchSlurmConfigPath, "Legacy crunch-dispatch-slurm configuration `file`")
-       flagset.StringVar(&ldr.WebsocketPath, "legacy-ws-config", defaultWebsocketConfigPath, "Legacy arvados-ws configuration `file`")
-       flagset.StringVar(&ldr.KeepproxyPath, "legacy-keepproxy-config", defaultKeepproxyConfigPath, "Legacy keepproxy configuration `file`")
-       flagset.StringVar(&ldr.GitHttpdPath, "legacy-git-httpd-config", defaultGitHttpdConfigPath, "Legacy arv-git-httpd configuration `file`")
-       flagset.StringVar(&ldr.KeepBalancePath, "legacy-keepbalance-config", defaultKeepBalanceConfigPath, "Legacy keep-balance configuration `file`")
-       flagset.BoolVar(&ldr.SkipLegacy, "skip-legacy", false, "Don't load legacy config files")
+       if !ldr.SkipLegacy {
+               flagset.StringVar(&ldr.KeepstorePath, "legacy-keepstore-config", defaultKeepstoreConfigPath, "Legacy keepstore configuration `file`")
+               flagset.StringVar(&ldr.KeepWebPath, "legacy-keepweb-config", defaultKeepWebConfigPath, "Legacy keep-web configuration `file`")
+               flagset.StringVar(&ldr.CrunchDispatchSlurmPath, "legacy-crunch-dispatch-slurm-config", defaultCrunchDispatchSlurmConfigPath, "Legacy crunch-dispatch-slurm configuration `file`")
+               flagset.StringVar(&ldr.WebsocketPath, "legacy-ws-config", defaultWebsocketConfigPath, "Legacy arvados-ws configuration `file`")
+               flagset.StringVar(&ldr.KeepproxyPath, "legacy-keepproxy-config", defaultKeepproxyConfigPath, "Legacy keepproxy configuration `file`")
+               flagset.StringVar(&ldr.GitHttpdPath, "legacy-git-httpd-config", defaultGitHttpdConfigPath, "Legacy arv-git-httpd configuration `file`")
+               flagset.StringVar(&ldr.KeepBalancePath, "legacy-keepbalance-config", defaultKeepBalanceConfigPath, "Legacy keep-balance configuration `file`")
+               flagset.BoolVar(&ldr.SkipLegacy, "skip-legacy", false, "Don't load legacy config files")
+       }
  }
  
  // MungeLegacyConfigArgs checks args for a -config flag whose argument
diff --git a/lib/controller/handler_test.go b/lib/controller/handler_test.go

index 3c7ae3a2d9e5abf2773af742e0bcbc1b2213aea5..c7bce97130bfb0e4b327d3d2233a41d9c9c3b73d 100644 (file)
--- a/lib/controller/handler_test.go
+++ b/lib/controller/handler_test.go
@@ -71,7 +71,10 @@ func (s *HandlerSuite) TestConfigExport(c *check.C) {
                 req := httptest.NewRequest(method, "/arvados/v1/config", nil)
                 resp := httptest.NewRecorder()
                 s.handler.ServeHTTP(resp, req)
-               c.Check(resp.Code, check.Equals, http.StatusOK)
+               c.Log(resp.Body.String())
+               if !c.Check(resp.Code, check.Equals, http.StatusOK) {
+                       continue
+               }
                 c.Check(resp.Header().Get("Access-Control-Allow-Origin"), check.Equals, `*`)
                 c.Check(resp.Header().Get("Access-Control-Allow-Methods"), check.Matches, `.*\bGET\b.*`)
                 c.Check(resp.Header().Get("Access-Control-Allow-Headers"), check.Matches, `.+`)
@@ -80,12 +83,11 @@ func (s *HandlerSuite) TestConfigExport(c *check.C) {
                         continue
                 }
                 var cluster arvados.Cluster
-               c.Log(resp.Body.String())
                 err := json.Unmarshal(resp.Body.Bytes(), &cluster)
                 c.Check(err, check.IsNil)
                 c.Check(cluster.ManagementToken, check.Equals, "")
                 c.Check(cluster.SystemRootToken, check.Equals, "")
-               c.Check(cluster.Collections.BlobSigning, check.DeepEquals, true)
+               c.Check(cluster.Collections.BlobSigning, check.Equals, true)
                 c.Check(cluster.Collections.BlobSigningTTL, check.Equals, arvados.Duration(23*time.Second))
         }
  }
diff --git a/lib/controller/localdb/login.go b/lib/controller/localdb/login.go

index 0fd0a9ad2348045c1abd85b3f47a54a5d25dc202..905cfed15c500d95689857e36c1c3323165c4d3d 100644 (file)
--- a/lib/controller/localdb/login.go
+++ b/lib/controller/localdb/login.go
@@ -24,21 +24,42 @@ type loginController interface {
  
  func chooseLoginController(cluster *arvados.Cluster, railsProxy *railsProxy) loginController {
         wantGoogle := cluster.Login.Google.Enable
+       wantOpenIDConnect := cluster.Login.OpenIDConnect.Enable
         wantSSO := cluster.Login.SSO.Enable
         wantPAM := cluster.Login.PAM.Enable
         wantLDAP := cluster.Login.LDAP.Enable
         switch {
-       case wantGoogle && !wantSSO && !wantPAM && !wantLDAP:
-               return &googleLoginController{Cluster: cluster, RailsProxy: railsProxy}
-       case !wantGoogle && wantSSO && !wantPAM && !wantLDAP:
+       case wantGoogle && !wantOpenIDConnect && !wantSSO && !wantPAM && !wantLDAP:
+               return &oidcLoginController{
+                       Cluster:            cluster,
+                       RailsProxy:         railsProxy,
+                       Issuer:             "https://accounts.google.com",
+                       ClientID:           cluster.Login.Google.ClientID,
+                       ClientSecret:       cluster.Login.Google.ClientSecret,
+                       UseGooglePeopleAPI: cluster.Login.Google.AlternateEmailAddresses,
+                       EmailClaim:         "email",
+                       EmailVerifiedClaim: "email_verified",
+               }
+       case !wantGoogle && wantOpenIDConnect && !wantSSO && !wantPAM && !wantLDAP:
+               return &oidcLoginController{
+                       Cluster:            cluster,
+                       RailsProxy:         railsProxy,
+                       Issuer:             cluster.Login.OpenIDConnect.Issuer,
+                       ClientID:           cluster.Login.OpenIDConnect.ClientID,
+                       ClientSecret:       cluster.Login.OpenIDConnect.ClientSecret,
+                       EmailClaim:         cluster.Login.OpenIDConnect.EmailClaim,
+                       EmailVerifiedClaim: cluster.Login.OpenIDConnect.EmailVerifiedClaim,
+                       UsernameClaim:      cluster.Login.OpenIDConnect.UsernameClaim,
+               }
+       case !wantGoogle && !wantOpenIDConnect && wantSSO && !wantPAM && !wantLDAP:
                 return &ssoLoginController{railsProxy}
-       case !wantGoogle && !wantSSO && wantPAM && !wantLDAP:
+       case !wantGoogle && !wantOpenIDConnect && !wantSSO && wantPAM && !wantLDAP:
                 return &pamLoginController{Cluster: cluster, RailsProxy: railsProxy}
-       case !wantGoogle && !wantSSO && !wantPAM && wantLDAP:
+       case !wantGoogle && !wantOpenIDConnect && !wantSSO && !wantPAM && wantLDAP:
                 return &ldapLoginController{Cluster: cluster, RailsProxy: railsProxy}
         default:
                 return errorLoginController{
-                       error: errors.New("configuration problem: exactly one of Login.Google, Login.SSO, Login.PAM, and Login.LDAP must be enabled"),
+                       error: errors.New("configuration problem: exactly one of Login.Google, Login.OpenIDConnect, Login.SSO, Login.PAM, and Login.LDAP must be enabled"),
                 }
         }
  }
diff --git a/lib/controller/localdb/login_google.go b/lib/controller/localdb/login_oidc.go

similarity index 66%

rename from lib/controller/localdb/login_google.go

rename to lib/controller/localdb/login_oidc.go

index 144b04c46d7ee6ab865eff24a7acc06db08f5dd9..9274d75d7c9fdc1973cbcad621b306599e571893 100644 (file)
--- a/lib/controller/localdb/login_google.go
+++ b/lib/controller/localdb/login_oidc.go
@@ -30,70 +30,75 @@ import (
         "google.golang.org/api/people/v1"
  )
  
-type googleLoginController struct {
-       Cluster    *arvados.Cluster
-       RailsProxy *railsProxy
+type oidcLoginController struct {
+       Cluster            *arvados.Cluster
+       RailsProxy         *railsProxy
+       Issuer             string // OIDC issuer URL, e.g., "https://accounts.google.com"
+       ClientID           string
+       ClientSecret       string
+       UseGooglePeopleAPI bool   // Use Google People API to look up alternate email addresses
+       EmailClaim         string // OpenID claim to use as email address; typically "email"
+       EmailVerifiedClaim string // If non-empty, ensure claim value is true before accepting EmailClaim; typically "email_verified"
+       UsernameClaim      string // If non-empty, use as preferred username
  
-       issuer            string // override OIDC issuer URL (normally https://accounts.google.com) for testing
-       peopleAPIBasePath string // override Google People API base URL (normally set by google pkg to https://people.googleapis.com/)
-       provider          *oidc.Provider
-       mu                sync.Mutex
+       // override Google People API base URL for testing purposes
+       // (normally empty, set by google pkg to
+       // https://people.googleapis.com/)
+       peopleAPIBasePath string
+
+       provider   *oidc.Provider        // initialized by setup()
+       oauth2conf *oauth2.Config        // initialized by setup()
+       verifier   *oidc.IDTokenVerifier // initialized by setup()
+       mu         sync.Mutex            // protects setup()
  }
  
-func (ctrl *googleLoginController) getProvider() (*oidc.Provider, error) {
+// Initialize ctrl.provider and ctrl.oauth2conf.
+func (ctrl *oidcLoginController) setup() error {
         ctrl.mu.Lock()
         defer ctrl.mu.Unlock()
-       if ctrl.provider == nil {
-               issuer := ctrl.issuer
-               if issuer == "" {
-                       issuer = "https://accounts.google.com"
-               }
-               provider, err := oidc.NewProvider(context.Background(), issuer)
-               if err != nil {
-                       return nil, err
-               }
-               ctrl.provider = provider
+       if ctrl.provider != nil {
+               // already set up
+               return nil
         }
-       return ctrl.provider, nil
-}
-
-func (ctrl *googleLoginController) Logout(ctx context.Context, opts arvados.LogoutOptions) (arvados.LogoutResponse, error) {
-       return noopLogout(ctrl.Cluster, opts)
-}
-
-func (ctrl *googleLoginController) Login(ctx context.Context, opts arvados.LoginOptions) (arvados.LoginResponse, error) {
-       provider, err := ctrl.getProvider()
+       redirURL, err := (*url.URL)(&ctrl.Cluster.Services.Controller.ExternalURL).Parse("/" + arvados.EndpointLogin.Path)
         if err != nil {
-               return loginError(fmt.Errorf("error setting up OpenID Connect provider: %s", err))
+               return fmt.Errorf("error making redirect URL: %s", err)
         }
-       redirURL, err := (*url.URL)(&ctrl.Cluster.Services.Controller.ExternalURL).Parse("/login")
+       provider, err := oidc.NewProvider(context.Background(), ctrl.Issuer)
         if err != nil {
-               return loginError(fmt.Errorf("error making redirect URL: %s", err))
+               return err
         }
-       conf := &oauth2.Config{
-               ClientID:     ctrl.Cluster.Login.Google.ClientID,
-               ClientSecret: ctrl.Cluster.Login.Google.ClientSecret,
+       ctrl.oauth2conf = &oauth2.Config{
+               ClientID:     ctrl.ClientID,
+               ClientSecret: ctrl.ClientSecret,
                 Endpoint:     provider.Endpoint(),
                 Scopes:       []string{oidc.ScopeOpenID, "profile", "email"},
                 RedirectURL:  redirURL.String(),
         }
-       verifier := provider.Verifier(&oidc.Config{
-               ClientID: conf.ClientID,
+       ctrl.verifier = provider.Verifier(&oidc.Config{
+               ClientID: ctrl.ClientID,
         })
+       ctrl.provider = provider
+       return nil
+}
+
+func (ctrl *oidcLoginController) Logout(ctx context.Context, opts arvados.LogoutOptions) (arvados.LogoutResponse, error) {
+       return noopLogout(ctrl.Cluster, opts)
+}
+
+func (ctrl *oidcLoginController) Login(ctx context.Context, opts arvados.LoginOptions) (arvados.LoginResponse, error) {
+       err := ctrl.setup()
+       if err != nil {
+               return loginError(fmt.Errorf("error setting up OpenID Connect provider: %s", err))
+       }
         if opts.State == "" {
-               // Initiate Google sign-in.
+               // Initiate OIDC sign-in.
                 if opts.ReturnTo == "" {
                         return loginError(errors.New("missing return_to parameter"))
                 }
-               me := url.URL(ctrl.Cluster.Services.Controller.ExternalURL)
-               callback, err := me.Parse("/" + arvados.EndpointLogin.Path)
-               if err != nil {
-                       return loginError(err)
-               }
-               conf.RedirectURL = callback.String()
                 state := ctrl.newOAuth2State([]byte(ctrl.Cluster.SystemRootToken), opts.Remote, opts.ReturnTo)
                 return arvados.LoginResponse{
-                       RedirectLocation: conf.AuthCodeURL(state.String(),
+                       RedirectLocation: ctrl.oauth2conf.AuthCodeURL(state.String(),
                                 // prompt=select_account tells Google
                                 // to show the "choose which Google
                                 // account" page, even if the client
@@ -102,12 +107,12 @@ func (ctrl *googleLoginController) Login(ctx context.Context, opts arvados.Login
                                 oauth2.SetAuthURLParam("prompt", "select_account")),
                 }, nil
         } else {
-               // Callback after Google sign-in.
+               // Callback after OIDC sign-in.
                 state := ctrl.parseOAuth2State(opts.State)
                 if !state.verify([]byte(ctrl.Cluster.SystemRootToken)) {
                         return loginError(errors.New("invalid OAuth2 state"))
                 }
-               oauth2Token, err := conf.Exchange(ctx, opts.Code)
+               oauth2Token, err := ctrl.oauth2conf.Exchange(ctx, opts.Code)
                 if err != nil {
                         return loginError(fmt.Errorf("error in OAuth2 exchange: %s", err))
                 }
@@ -115,11 +120,11 @@ func (ctrl *googleLoginController) Login(ctx context.Context, opts arvados.Login
                 if !ok {
                         return loginError(errors.New("error in OAuth2 exchange: no ID token in OAuth2 token"))
                 }
-               idToken, err := verifier.Verify(ctx, rawIDToken)
+               idToken, err := ctrl.verifier.Verify(ctx, rawIDToken)
                 if err != nil {
                         return loginError(fmt.Errorf("error verifying ID token: %s", err))
                 }
-               authinfo, err := ctrl.getAuthInfo(ctx, ctrl.Cluster, conf, oauth2Token, idToken)
+               authinfo, err := ctrl.getAuthInfo(ctx, oauth2Token, idToken)
                 if err != nil {
                         return loginError(err)
                 }
@@ -131,7 +136,7 @@ func (ctrl *googleLoginController) Login(ctx context.Context, opts arvados.Login
         }
  }
  
-func (ctrl *googleLoginController) UserAuthenticate(ctx context.Context, opts arvados.UserAuthenticateOptions) (arvados.APIClientAuthorization, error) {
+func (ctrl *oidcLoginController) UserAuthenticate(ctx context.Context, opts arvados.UserAuthenticateOptions) (arvados.APIClientAuthorization, error) {
         return arvados.APIClientAuthorization{}, httpserver.ErrorWithStatus(errors.New("username/password authentication is not available"), http.StatusBadRequest)
  }
  
@@ -139,37 +144,38 @@ func (ctrl *googleLoginController) UserAuthenticate(ctx context.Context, opts ar
  // primary address at index 0. The provided defaultAddr is always
  // included in the returned slice, and is used as the primary if the
  // Google API does not indicate one.
-func (ctrl *googleLoginController) getAuthInfo(ctx context.Context, cluster *arvados.Cluster, conf *oauth2.Config, token *oauth2.Token, idToken *oidc.IDToken) (*rpc.UserSessionAuthInfo, error) {
+func (ctrl *oidcLoginController) getAuthInfo(ctx context.Context, token *oauth2.Token, idToken *oidc.IDToken) (*rpc.UserSessionAuthInfo, error) {
         var ret rpc.UserSessionAuthInfo
         defer ctxlog.FromContext(ctx).WithField("ret", &ret).Debug("getAuthInfo returned")
  
-       var claims struct {
-               Name     string `json:"name"`
-               Email    string `json:"email"`
-               Verified bool   `json:"email_verified"`
-       }
+       var claims map[string]interface{}
         if err := idToken.Claims(&claims); err != nil {
                 return nil, fmt.Errorf("error extracting claims from ID token: %s", err)
-       } else if claims.Verified {
+       } else if verified, _ := claims[ctrl.EmailVerifiedClaim].(bool); verified || ctrl.EmailVerifiedClaim == "" {
                 // Fall back to this info if the People API call
                 // (below) doesn't return a primary && verified email.
-               if names := strings.Fields(strings.TrimSpace(claims.Name)); len(names) > 1 {
+               name, _ := claims["name"].(string)
+               if names := strings.Fields(strings.TrimSpace(name)); len(names) > 1 {
                         ret.FirstName = strings.Join(names[0:len(names)-1], " ")
                         ret.LastName = names[len(names)-1]
                 } else {
                         ret.FirstName = names[0]
                 }
-               ret.Email = claims.Email
+               ret.Email, _ = claims[ctrl.EmailClaim].(string)
+       }
+
+       if ctrl.UsernameClaim != "" {
+               ret.Username, _ = claims[ctrl.UsernameClaim].(string)
         }
  
-       if !ctrl.Cluster.Login.Google.AlternateEmailAddresses {
+       if !ctrl.UseGooglePeopleAPI {
                 if ret.Email == "" {
-                       return nil, fmt.Errorf("cannot log in with unverified email address %q", claims.Email)
+                       return nil, fmt.Errorf("cannot log in with unverified email address %q", claims[ctrl.EmailClaim])
                 }
                 return &ret, nil
         }
  
-       svc, err := people.NewService(ctx, option.WithTokenSource(conf.TokenSource(ctx, token)), option.WithScopes(people.UserEmailsReadScope))
+       svc, err := people.NewService(ctx, option.WithTokenSource(ctrl.oauth2conf.TokenSource(ctx, token)), option.WithScopes(people.UserEmailsReadScope))
         if err != nil {
                 return nil, fmt.Errorf("error setting up People API: %s", err)
         }
@@ -218,9 +224,13 @@ func (ctrl *googleLoginController) getAuthInfo(ctx context.Context, cluster *arv
                 return nil, errors.New("cannot log in without a verified email address")
         }
         for ae := range altEmails {
-               if ae != ret.Email {
-                       ret.AlternateEmails = append(ret.AlternateEmails, ae)
-                       if i := strings.Index(ae, "@"); i > 0 && strings.ToLower(ae[i+1:]) == strings.ToLower(ctrl.Cluster.Users.PreferDomainForUsername) {
+               if ae == ret.Email {
+                       continue
+               }
+               ret.AlternateEmails = append(ret.AlternateEmails, ae)
+               if ret.Username == "" {
+                       i := strings.Index(ae, "@")
+                       if i > 0 && strings.ToLower(ae[i+1:]) == strings.ToLower(ctrl.Cluster.Users.PreferDomainForUsername) {
                                 ret.Username = strings.SplitN(ae[:i], "+", 2)[0]
                         }
                 }
@@ -237,7 +247,7 @@ func loginError(sendError error) (resp arvados.LoginResponse, err error) {
         return
  }
  
-func (ctrl *googleLoginController) newOAuth2State(key []byte, remote, returnTo string) oauth2State {
+func (ctrl *oidcLoginController) newOAuth2State(key []byte, remote, returnTo string) oauth2State {
         s := oauth2State{
                 Time:     time.Now().Unix(),
                 Remote:   remote,
@@ -254,7 +264,7 @@ type oauth2State struct {
         ReturnTo string // redirect target
  }
  
-func (ctrl *googleLoginController) parseOAuth2State(encoded string) (s oauth2State) {
+func (ctrl *oidcLoginController) parseOAuth2State(encoded string) (s oauth2State) {
         // Errors are not checked. If decoding/parsing fails, the
         // token will be rejected by verify().
         decoded, _ := base64.RawURLEncoding.DecodeString(encoded)
diff --git a/lib/controller/localdb/login_google_test.go b/lib/controller/localdb/login_oidc_test.go

similarity index 65%

rename from lib/controller/localdb/login_google_test.go

rename to lib/controller/localdb/login_oidc_test.go

index 495fbb69b31e5c659b4476c772303658e46592c1..1345e86900dd1056da5a9259bf0d5caf179253e5 100644 (file)
--- a/lib/controller/localdb/login_google_test.go
+++ b/lib/controller/localdb/login_oidc_test.go
@@ -9,6 +9,7 @@ import (
         "context"
         "crypto/rand"
         "crypto/rsa"
+       "encoding/base64"
         "encoding/json"
         "fmt"
         "net/http"
@@ -34,9 +35,9 @@ func Test(t *testing.T) {
         check.TestingT(t)
  }
  
-var _ = check.Suite(&LoginSuite{})
+var _ = check.Suite(&OIDCLoginSuite{})
  
-type LoginSuite struct {
+type OIDCLoginSuite struct {
         cluster               *arvados.Cluster
         ctx                   context.Context
         localdb               *Conn
@@ -47,21 +48,23 @@ type LoginSuite struct {
         issuerKey             *rsa.PrivateKey
  
         // expected token request
-       validCode string
+       validCode         string
+       validClientID     string
+       validClientSecret string
         // desired response from token endpoint
         authEmail         string
         authEmailVerified bool
         authName          string
  }
  
-func (s *LoginSuite) TearDownSuite(c *check.C) {
+func (s *OIDCLoginSuite) TearDownSuite(c *check.C) {
         // Undo any changes/additions to the user database so they
         // don't affect subsequent tests.
         arvadostest.ResetEnv()
         c.Check(arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil), check.IsNil)
  }
  
-func (s *LoginSuite) SetUpTest(c *check.C) {
+func (s *OIDCLoginSuite) SetUpTest(c *check.C) {
         var err error
         s.issuerKey, err = rsa.GenerateKey(rand.Reader, 2048)
         c.Assert(err, check.IsNil)
@@ -83,20 +86,36 @@ func (s *LoginSuite) SetUpTest(c *check.C) {
                                 "userinfo_endpoint":      s.fakeIssuer.URL + "/userinfo",
                         })
                 case "/token":
+                       var clientID, clientSecret string
+                       auth, _ := base64.StdEncoding.DecodeString(strings.TrimPrefix(req.Header.Get("Authorization"), "Basic "))
+                       authsplit := strings.Split(string(auth), ":")
+                       if len(authsplit) == 2 {
+                               clientID, _ = url.QueryUnescape(authsplit[0])
+                               clientSecret, _ = url.QueryUnescape(authsplit[1])
+                       }
+                       if clientID != s.validClientID || clientSecret != s.validClientSecret {
+                               c.Logf("fakeIssuer: expected (%q, %q) got (%q, %q)", s.validClientID, s.validClientSecret, clientID, clientSecret)
+                               w.WriteHeader(http.StatusUnauthorized)
+                               return
+                       }
+
                         if req.Form.Get("code") != s.validCode || s.validCode == "" {
                                 w.WriteHeader(http.StatusUnauthorized)
                                 return
                         }
                         idToken, _ := json.Marshal(map[string]interface{}{
                                 "iss":            s.fakeIssuer.URL,
-                               "aud":            []string{"test%client$id"},
+                               "aud":            []string{clientID},
                                 "sub":            "fake-user-id",
-                               "exp":            time.Now().UTC().Add(time.Minute).UnixNano(),
-                               "iat":            time.Now().UTC().UnixNano(),
+                               "exp":            time.Now().UTC().Add(time.Minute).Unix(),
+                               "iat":            time.Now().UTC().Unix(),
                                 "nonce":          "fake-nonce",
                                 "email":          s.authEmail,
                                 "email_verified": s.authEmailVerified,
                                 "name":           s.authName,
+                               "alt_verified":   true,                    // for custom claim tests
+                               "alt_email":      "alt_email@example.com", // for custom claim tests
+                               "alt_username":   "desired-username",      // for custom claim tests
                         })
                         json.NewEncoder(w).Encode(struct {
                                 AccessToken  string `json:"access_token"`
@@ -145,40 +164,44 @@ func (s *LoginSuite) SetUpTest(c *check.C) {
         s.fakePeopleAPIResponse = map[string]interface{}{}
  
         cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
+       c.Assert(err, check.IsNil)
         s.cluster, err = cfg.GetCluster("")
+       c.Assert(err, check.IsNil)
         s.cluster.Login.SSO.Enable = false
         s.cluster.Login.Google.Enable = true
         s.cluster.Login.Google.ClientID = "test%client$id"
         s.cluster.Login.Google.ClientSecret = "test#client/secret"
         s.cluster.Users.PreferDomainForUsername = "PreferDomainForUsername.example.com"
-       c.Assert(err, check.IsNil)
+       s.validClientID = "test%client$id"
+       s.validClientSecret = "test#client/secret"
  
         s.localdb = NewConn(s.cluster)
-       s.localdb.loginController.(*googleLoginController).issuer = s.fakeIssuer.URL
-       s.localdb.loginController.(*googleLoginController).peopleAPIBasePath = s.fakePeopleAPI.URL
+       c.Assert(s.localdb.loginController, check.FitsTypeOf, (*oidcLoginController)(nil))
+       s.localdb.loginController.(*oidcLoginController).Issuer = s.fakeIssuer.URL
+       s.localdb.loginController.(*oidcLoginController).peopleAPIBasePath = s.fakePeopleAPI.URL
  
         s.railsSpy = arvadostest.NewProxy(c, s.cluster.Services.RailsAPI)
         *s.localdb.railsProxy = *rpc.NewConn(s.cluster.ClusterID, s.railsSpy.URL, true, rpc.PassthroughTokenProvider)
  }
  
-func (s *LoginSuite) TearDownTest(c *check.C) {
+func (s *OIDCLoginSuite) TearDownTest(c *check.C) {
         s.railsSpy.Close()
  }
  
-func (s *LoginSuite) TestGoogleLogout(c *check.C) {
+func (s *OIDCLoginSuite) TestGoogleLogout(c *check.C) {
         resp, err := s.localdb.Logout(context.Background(), arvados.LogoutOptions{ReturnTo: "https://foo.example.com/bar"})
         c.Check(err, check.IsNil)
         c.Check(resp.RedirectLocation, check.Equals, "https://foo.example.com/bar")
  }
  
-func (s *LoginSuite) TestGoogleLogin_Start_Bogus(c *check.C) {
+func (s *OIDCLoginSuite) TestGoogleLogin_Start_Bogus(c *check.C) {
         resp, err := s.localdb.Login(context.Background(), arvados.LoginOptions{})
         c.Check(err, check.IsNil)
         c.Check(resp.RedirectLocation, check.Equals, "")
         c.Check(resp.HTML.String(), check.Matches, `.*missing return_to parameter.*`)
  }
  
-func (s *LoginSuite) TestGoogleLogin_Start(c *check.C) {
+func (s *OIDCLoginSuite) TestGoogleLogin_Start(c *check.C) {
         for _, remote := range []string{"", "zzzzz"} {
                 resp, err := s.localdb.Login(context.Background(), arvados.LoginOptions{Remote: remote, ReturnTo: "https://app.example.com/foo?bar"})
                 c.Check(err, check.IsNil)
@@ -188,7 +211,7 @@ func (s *LoginSuite) TestGoogleLogin_Start(c *check.C) {
                 c.Check(target.Host, check.Equals, issuerURL.Host)
                 q := target.Query()
                 c.Check(q.Get("client_id"), check.Equals, "test%client$id")
-               state := s.localdb.loginController.(*googleLoginController).parseOAuth2State(q.Get("state"))
+               state := s.localdb.loginController.(*oidcLoginController).parseOAuth2State(q.Get("state"))
                 c.Check(state.verify([]byte(s.cluster.SystemRootToken)), check.Equals, true)
                 c.Check(state.Time, check.Not(check.Equals), 0)
                 c.Check(state.Remote, check.Equals, remote)
@@ -196,7 +219,7 @@ func (s *LoginSuite) TestGoogleLogin_Start(c *check.C) {
         }
  }
  
-func (s *LoginSuite) TestGoogleLogin_InvalidCode(c *check.C) {
+func (s *OIDCLoginSuite) TestGoogleLogin_InvalidCode(c *check.C) {
         state := s.startLogin(c)
         resp, err := s.localdb.Login(context.Background(), arvados.LoginOptions{
                 Code:  "first-try-a-bogus-code",
@@ -207,7 +230,7 @@ func (s *LoginSuite) TestGoogleLogin_InvalidCode(c *check.C) {
         c.Check(resp.HTML.String(), check.Matches, `(?ms).*error in OAuth2 exchange.*cannot fetch token.*`)
  }
  
-func (s *LoginSuite) TestGoogleLogin_InvalidState(c *check.C) {
+func (s *OIDCLoginSuite) TestGoogleLogin_InvalidState(c *check.C) {
         s.startLogin(c)
         resp, err := s.localdb.Login(context.Background(), arvados.LoginOptions{
                 Code:  s.validCode,
@@ -218,16 +241,16 @@ func (s *LoginSuite) TestGoogleLogin_InvalidState(c *check.C) {
         c.Check(resp.HTML.String(), check.Matches, `(?ms).*invalid OAuth2 state.*`)
  }
  
-func (s *LoginSuite) setupPeopleAPIError(c *check.C) {
+func (s *OIDCLoginSuite) setupPeopleAPIError(c *check.C) {
         s.fakePeopleAPI = httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) {
                 w.WriteHeader(http.StatusForbidden)
                 fmt.Fprintln(w, `Error 403: accessNotConfigured`)
         }))
-       s.localdb.loginController.(*googleLoginController).peopleAPIBasePath = s.fakePeopleAPI.URL
+       s.localdb.loginController.(*oidcLoginController).peopleAPIBasePath = s.fakePeopleAPI.URL
  }
  
-func (s *LoginSuite) TestGoogleLogin_PeopleAPIDisabled(c *check.C) {
-       s.cluster.Login.Google.AlternateEmailAddresses = false
+func (s *OIDCLoginSuite) TestGoogleLogin_PeopleAPIDisabled(c *check.C) {
+       s.localdb.loginController.(*oidcLoginController).UseGooglePeopleAPI = false
         s.authEmail = "joe.smith@primary.example.com"
         s.setupPeopleAPIError(c)
         state := s.startLogin(c)
@@ -240,7 +263,35 @@ func (s *LoginSuite) TestGoogleLogin_PeopleAPIDisabled(c *check.C) {
         c.Check(authinfo.Email, check.Equals, "joe.smith@primary.example.com")
  }
  
-func (s *LoginSuite) TestGoogleLogin_PeopleAPIError(c *check.C) {
+func (s *OIDCLoginSuite) TestConfig(c *check.C) {
+       s.cluster.Login.Google.Enable = false
+       s.cluster.Login.OpenIDConnect.Enable = true
+       s.cluster.Login.OpenIDConnect.Issuer = "https://accounts.example.com/"
+       s.cluster.Login.OpenIDConnect.ClientID = "oidc-client-id"
+       s.cluster.Login.OpenIDConnect.ClientSecret = "oidc-client-secret"
+       localdb := NewConn(s.cluster)
+       ctrl := localdb.loginController.(*oidcLoginController)
+       c.Check(ctrl.Issuer, check.Equals, "https://accounts.example.com/")
+       c.Check(ctrl.ClientID, check.Equals, "oidc-client-id")
+       c.Check(ctrl.ClientSecret, check.Equals, "oidc-client-secret")
+       c.Check(ctrl.UseGooglePeopleAPI, check.Equals, false)
+
+       for _, enableAltEmails := range []bool{false, true} {
+               s.cluster.Login.OpenIDConnect.Enable = false
+               s.cluster.Login.Google.Enable = true
+               s.cluster.Login.Google.ClientID = "google-client-id"
+               s.cluster.Login.Google.ClientSecret = "google-client-secret"
+               s.cluster.Login.Google.AlternateEmailAddresses = enableAltEmails
+               localdb = NewConn(s.cluster)
+               ctrl = localdb.loginController.(*oidcLoginController)
+               c.Check(ctrl.Issuer, check.Equals, "https://accounts.google.com")
+               c.Check(ctrl.ClientID, check.Equals, "google-client-id")
+               c.Check(ctrl.ClientSecret, check.Equals, "google-client-secret")
+               c.Check(ctrl.UseGooglePeopleAPI, check.Equals, enableAltEmails)
+       }
+}
+
+func (s *OIDCLoginSuite) TestGoogleLogin_PeopleAPIError(c *check.C) {
         s.setupPeopleAPIError(c)
         state := s.startLogin(c)
         resp, err := s.localdb.Login(context.Background(), arvados.LoginOptions{
@@ -251,7 +302,102 @@ func (s *LoginSuite) TestGoogleLogin_PeopleAPIError(c *check.C) {
         c.Check(resp.RedirectLocation, check.Equals, "")
  }
  
-func (s *LoginSuite) TestGoogleLogin_Success(c *check.C) {
+func (s *OIDCLoginSuite) TestGenericOIDCLogin(c *check.C) {
+       s.cluster.Login.Google.Enable = false
+       s.cluster.Login.OpenIDConnect.Enable = true
+       json.Unmarshal([]byte(fmt.Sprintf("%q", s.fakeIssuer.URL)), &s.cluster.Login.OpenIDConnect.Issuer)
+       s.cluster.Login.OpenIDConnect.ClientID = "oidc#client#id"
+       s.cluster.Login.OpenIDConnect.ClientSecret = "oidc#client#secret"
+       s.validClientID = "oidc#client#id"
+       s.validClientSecret = "oidc#client#secret"
+       for _, trial := range []struct {
+               expectEmail string // "" if failure expected
+               setup       func()
+       }{
+               {
+                       expectEmail: "user@oidc.example.com",
+                       setup: func() {
+                               c.Log("=== succeed because email_verified is false but not required")
+                               s.authEmail = "user@oidc.example.com"
+                               s.authEmailVerified = false
+                               s.cluster.Login.OpenIDConnect.EmailClaim = "email"
+                               s.cluster.Login.OpenIDConnect.EmailVerifiedClaim = ""
+                               s.cluster.Login.OpenIDConnect.UsernameClaim = ""
+                       },
+               },
+               {
+                       expectEmail: "",
+                       setup: func() {
+                               c.Log("=== fail because email_verified is false and required")
+                               s.authEmail = "user@oidc.example.com"
+                               s.authEmailVerified = false
+                               s.cluster.Login.OpenIDConnect.EmailClaim = "email"
+                               s.cluster.Login.OpenIDConnect.EmailVerifiedClaim = "email_verified"
+                               s.cluster.Login.OpenIDConnect.UsernameClaim = ""
+                       },
+               },
+               {
+                       expectEmail: "user@oidc.example.com",
+                       setup: func() {
+                               c.Log("=== succeed because email_verified is false but config uses custom 'verified' claim")
+                               s.authEmail = "user@oidc.example.com"
+                               s.authEmailVerified = false
+                               s.cluster.Login.OpenIDConnect.EmailClaim = "email"
+                               s.cluster.Login.OpenIDConnect.EmailVerifiedClaim = "alt_verified"
+                               s.cluster.Login.OpenIDConnect.UsernameClaim = ""
+                       },
+               },
+               {
+                       expectEmail: "alt_email@example.com",
+                       setup: func() {
+                               c.Log("=== succeed with custom 'email' and 'email_verified' claims")
+                               s.authEmail = "bad@wrong.example.com"
+                               s.authEmailVerified = false
+                               s.cluster.Login.OpenIDConnect.EmailClaim = "alt_email"
+                               s.cluster.Login.OpenIDConnect.EmailVerifiedClaim = "alt_verified"
+                               s.cluster.Login.OpenIDConnect.UsernameClaim = "alt_username"
+                       },
+               },
+       } {
+               trial.setup()
+               if s.railsSpy != nil {
+                       s.railsSpy.Close()
+               }
+               s.railsSpy = arvadostest.NewProxy(c, s.cluster.Services.RailsAPI)
+               s.localdb = NewConn(s.cluster)
+               *s.localdb.railsProxy = *rpc.NewConn(s.cluster.ClusterID, s.railsSpy.URL, true, rpc.PassthroughTokenProvider)
+
+               state := s.startLogin(c)
+               resp, err := s.localdb.Login(context.Background(), arvados.LoginOptions{
+                       Code:  s.validCode,
+                       State: state,
+               })
+               c.Assert(err, check.IsNil)
+               if trial.expectEmail == "" {
+                       c.Check(resp.HTML.String(), check.Matches, `(?ms).*Login error.*`)
+                       c.Check(resp.RedirectLocation, check.Equals, "")
+                       continue
+               }
+               c.Check(resp.HTML.String(), check.Equals, "")
+               target, err := url.Parse(resp.RedirectLocation)
+               c.Assert(err, check.IsNil)
+               token := target.Query().Get("api_token")
+               c.Check(token, check.Matches, `v2/zzzzz-gj3su-.{15}/.{32,50}`)
+               authinfo := getCallbackAuthInfo(c, s.railsSpy)
+               c.Check(authinfo.Email, check.Equals, trial.expectEmail)
+
+               switch s.cluster.Login.OpenIDConnect.UsernameClaim {
+               case "alt_username":
+                       c.Check(authinfo.Username, check.Equals, "desired-username")
+               case "":
+                       c.Check(authinfo.Username, check.Equals, "")
+               default:
+                       c.Fail() // bad test case
+               }
+       }
+}
+
+func (s *OIDCLoginSuite) TestGoogleLogin_Success(c *check.C) {
         state := s.startLogin(c)
         resp, err := s.localdb.Login(context.Background(), arvados.LoginOptions{
                 Code:  s.validCode,
@@ -290,7 +436,7 @@ func (s *LoginSuite) TestGoogleLogin_Success(c *check.C) {
         c.Check(err, check.ErrorMatches, `.*401 Unauthorized: Not logged in.*`)
  }
  
-func (s *LoginSuite) TestGoogleLogin_RealName(c *check.C) {
+func (s *OIDCLoginSuite) TestGoogleLogin_RealName(c *check.C) {
         s.authEmail = "joe.smith@primary.example.com"
         s.fakePeopleAPIResponse = map[string]interface{}{
                 "names": []map[string]interface{}{
@@ -317,7 +463,7 @@ func (s *LoginSuite) TestGoogleLogin_RealName(c *check.C) {
         c.Check(authinfo.LastName, check.Equals, "Psmith")
  }
  
-func (s *LoginSuite) TestGoogleLogin_OIDCRealName(c *check.C) {
+func (s *OIDCLoginSuite) TestGoogleLogin_OIDCRealName(c *check.C) {
         s.authName = "Joe P. Smith"
         s.authEmail = "joe.smith@primary.example.com"
         state := s.startLogin(c)
@@ -332,7 +478,7 @@ func (s *LoginSuite) TestGoogleLogin_OIDCRealName(c *check.C) {
  }
  
  // People API returns some additional email addresses.
-func (s *LoginSuite) TestGoogleLogin_AlternateEmailAddresses(c *check.C) {
+func (s *OIDCLoginSuite) TestGoogleLogin_AlternateEmailAddresses(c *check.C) {
         s.authEmail = "joe.smith@primary.example.com"
         s.fakePeopleAPIResponse = map[string]interface{}{
                 "emailAddresses": []map[string]interface{}{
@@ -361,7 +507,7 @@ func (s *LoginSuite) TestGoogleLogin_AlternateEmailAddresses(c *check.C) {
  }
  
  // Primary address is not the one initially returned by oidc.
-func (s *LoginSuite) TestGoogleLogin_AlternateEmailAddresses_Primary(c *check.C) {
+func (s *OIDCLoginSuite) TestGoogleLogin_AlternateEmailAddresses_Primary(c *check.C) {
         s.authEmail = "joe.smith@alternate.example.com"
         s.fakePeopleAPIResponse = map[string]interface{}{
                 "emailAddresses": []map[string]interface{}{
@@ -390,7 +536,7 @@ func (s *LoginSuite) TestGoogleLogin_AlternateEmailAddresses_Primary(c *check.C)
         c.Check(authinfo.Username, check.Equals, "jsmith")
  }
  
-func (s *LoginSuite) TestGoogleLogin_NoPrimaryEmailAddress(c *check.C) {
+func (s *OIDCLoginSuite) TestGoogleLogin_NoPrimaryEmailAddress(c *check.C) {
         s.authEmail = "joe.smith@unverified.example.com"
         s.authEmailVerified = false
         s.fakePeopleAPIResponse = map[string]interface{}{
@@ -417,7 +563,7 @@ func (s *LoginSuite) TestGoogleLogin_NoPrimaryEmailAddress(c *check.C) {
         c.Check(authinfo.Username, check.Equals, "")
  }
  
-func (s *LoginSuite) startLogin(c *check.C) (state string) {
+func (s *OIDCLoginSuite) startLogin(c *check.C) (state string) {
         // Initiate login, but instead of following the redirect to
         // the provider, just grab state from the redirect URL.
         resp, err := s.localdb.Login(context.Background(), arvados.LoginOptions{ReturnTo: "https://app.example.com/foo?bar"})
@@ -429,7 +575,7 @@ func (s *LoginSuite) startLogin(c *check.C) (state string) {
         return
  }
  
-func (s *LoginSuite) fakeToken(c *check.C, payload []byte) string {
+func (s *OIDCLoginSuite) fakeToken(c *check.C, payload []byte) string {
         signer, err := jose.NewSigner(jose.SigningKey{Algorithm: jose.RS256, Key: s.issuerKey}, nil)
         if err != nil {
                 c.Error(err)
diff --git a/lib/recovercollection/cmd.go b/lib/recovercollection/cmd.go

new file mode 100644 (file)

index 0000000..cea4607
--- /dev/null
+++ b/lib/recovercollection/cmd.go
@@ -0,0 +1,381 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package recovercollection
+
+import (
+       "context"
+       "errors"
+       "flag"
+       "fmt"
+       "io"
+       "io/ioutil"
+       "strings"
+       "sync"
+       "time"
+
+       "git.arvados.org/arvados.git/lib/config"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "github.com/sirupsen/logrus"
+)
+
+var Command command
+
+type command struct{}
+
+func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+       var err error
+       logger := ctxlog.New(stderr, "text", "info")
+       defer func() {
+               if err != nil {
+                       logger.WithError(err).Error("fatal")
+               }
+               logger.Info("exiting")
+       }()
+
+       loader := config.NewLoader(stdin, logger)
+       loader.SkipLegacy = true
+
+       flags := flag.NewFlagSet("", flag.ContinueOnError)
+       flags.SetOutput(stderr)
+       flags.Usage = func() {
+               fmt.Fprintf(flags.Output(), `Usage:
+       %s [options ...] { /path/to/manifest.txt | log-or-collection-uuid } [...]
+
+       This program recovers deleted collections. Recovery is
+       possible when the collection's manifest is still available and
+       all of its data blocks are still available or recoverable
+       (e.g., garbage collection is not enabled, the blocks are too
+       new for garbage collection, the blocks are referenced by other
+       collections, or the blocks have been trashed but not yet
+       deleted).
+
+       There are multiple ways to specify a collection to recover:
+
+        * Path to a local file containing a manifest with the desired
+         data
+
+       * UUID of an Arvados log entry, typically a "delete" or
+         "update" event, whose "old attributes" have a manifest with
+         the desired data
+
+       * UUID of an Arvados collection whose most recent log entry,
+          typically a "delete" or "update" event, has the desired
+          data in its "old attributes"
+
+       For each provided collection manifest, once all data blocks
+       are recovered/protected from garbage collection, a new
+       collection is saved and its UUID is printed on stdout.
+
+       Restored collections will belong to the system (root) user.
+
+       Exit status will be zero if recovery is successful, i.e., a
+       collection is saved for each provided manifest.
+Options:
+`, prog)
+               flags.PrintDefaults()
+       }
+       loader.SetupFlags(flags)
+       loglevel := flags.String("log-level", "info", "logging level (debug, info, ...)")
+       err = flags.Parse(args)
+       if err == flag.ErrHelp {
+               err = nil
+               return 0
+       } else if err != nil {
+               return 2
+       }
+
+       if len(flags.Args()) == 0 {
+               flags.Usage()
+               return 2
+       }
+
+       lvl, err := logrus.ParseLevel(*loglevel)
+       if err != nil {
+               return 2
+       }
+       logger.SetLevel(lvl)
+
+       cfg, err := loader.Load()
+       if err != nil {
+               return 1
+       }
+       cluster, err := cfg.GetCluster("")
+       if err != nil {
+               return 1
+       }
+       client, err := arvados.NewClientFromConfig(cluster)
+       if err != nil {
+               return 1
+       }
+       client.AuthToken = cluster.SystemRootToken
+       rcvr := recoverer{
+               client:  client,
+               cluster: cluster,
+               logger:  logger,
+       }
+
+       exitcode := 0
+       for _, src := range flags.Args() {
+               logger := logger.WithField("src", src)
+               var mtxt string
+               if !strings.Contains(src, "/") && len(src) == 27 && src[5] == '-' && src[11] == '-' {
+                       var filters []arvados.Filter
+                       if src[5:12] == "-57u5n-" {
+                               filters = []arvados.Filter{{"uuid", "=", src}}
+                       } else if src[5:12] == "-4zz18-" {
+                               filters = []arvados.Filter{{"object_uuid", "=", src}}
+                       } else {
+                               logger.Error("looks like a UUID but not a log or collection UUID (if it's really a file, prepend './')")
+                               exitcode = 1
+                               continue
+                       }
+                       var resp struct {
+                               Items []struct {
+                                       UUID       string    `json:"uuid"`
+                                       EventType  string    `json:"event_type"`
+                                       EventAt    time.Time `json:"event_at"`
+                                       ObjectUUID string    `json:"object_uuid"`
+                                       Properties struct {
+                                               OldAttributes struct {
+                                                       ManifestText string `json:"manifest_text"`
+                                               } `json:"old_attributes"`
+                                       } `json:"properties"`
+                               }
+                       }
+                       err = client.RequestAndDecode(&resp, "GET", "arvados/v1/logs", nil, arvados.ListOptions{
+                               Limit:   1,
+                               Order:   []string{"event_at desc"},
+                               Filters: filters,
+                       })
+                       if err != nil {
+                               logger.WithError(err).Error("error looking up log entry")
+                               exitcode = 1
+                               continue
+                       } else if len(resp.Items) == 0 {
+                               logger.Error("log entry not found")
+                               exitcode = 1
+                               continue
+                       }
+                       logent := resp.Items[0]
+                       logger.WithFields(logrus.Fields{
+                               "uuid":                logent.UUID,
+                               "old_collection_uuid": logent.ObjectUUID,
+                               "logged_event_type":   logent.EventType,
+                               "logged_event_time":   logent.EventAt,
+                               "logged_object_uuid":  logent.ObjectUUID,
+                       }).Info("loaded log entry")
+                       mtxt = logent.Properties.OldAttributes.ManifestText
+                       if mtxt == "" {
+                               logger.Error("log entry properties.old_attributes.manifest_text missing or empty")
+                               exitcode = 1
+                               continue
+                       }
+               } else {
+                       buf, err := ioutil.ReadFile(src)
+                       if err != nil {
+                               logger.WithError(err).Error("failed to load manifest data from file")
+                               exitcode = 1
+                               continue
+                       }
+                       mtxt = string(buf)
+               }
+               uuid, err := rcvr.RecoverManifest(string(mtxt))
+               if err != nil {
+                       logger.WithError(err).Error("recovery failed")
+                       exitcode = 1
+                       continue
+               }
+               logger.WithField("UUID", uuid).Info("recovery succeeded")
+               fmt.Fprintln(stdout, uuid)
+       }
+       return exitcode
+}
+
+type recoverer struct {
+       client  *arvados.Client
+       cluster *arvados.Cluster
+       logger  logrus.FieldLogger
+}
+
+var errNotFound = errors.New("not found")
+
+// Finds the timestamp of the newest copy of blk on svc. Returns
+// errNotFound if blk is not on svc at all.
+func (rcvr recoverer) newestMtime(logger logrus.FieldLogger, blk string, svc arvados.KeepService) (time.Time, error) {
+       found, err := svc.Index(rcvr.client, blk)
+       if err != nil {
+               logger.WithError(err).Warn("error getting index")
+               return time.Time{}, err
+       } else if len(found) == 0 {
+               return time.Time{}, errNotFound
+       }
+       var latest time.Time
+       for _, ent := range found {
+               t := time.Unix(0, ent.Mtime)
+               if t.After(latest) {
+                       latest = t
+               }
+       }
+       logger.WithField("latest", latest).Debug("found")
+       return latest, nil
+}
+
+var errTouchIneffective = errors.New("(BUG?) touch succeeded but had no effect -- reported timestamp is still too old")
+
+// Ensures the given block exists on the given server and won't be
+// eligible for trashing until after our chosen deadline (blobsigexp).
+// Returns an error if the block doesn't exist on the given server, or
+// has an old timestamp and can't be updated.
+//
+// After we decide a block is "safe" (whether or not we had to untrash
+// it), keep-balance might notice that it's currently unreferenced and
+// decide to trash it, all before our recovered collection gets
+// saved. But if the block's timestamp is more recent than blobsigttl,
+// keepstore will refuse to trash it even if told to by keep-balance.
+func (rcvr recoverer) ensureSafe(ctx context.Context, logger logrus.FieldLogger, blk string, svc arvados.KeepService, blobsigttl time.Duration, blobsigexp time.Time) error {
+       if latest, err := rcvr.newestMtime(logger, blk, svc); err != nil {
+               return err
+       } else if latest.Add(blobsigttl).After(blobsigexp) {
+               return nil
+       }
+       if err := svc.Touch(ctx, rcvr.client, blk); err != nil {
+               return fmt.Errorf("error updating timestamp: %s", err)
+       }
+       logger.Debug("updated timestamp")
+       if latest, err := rcvr.newestMtime(logger, blk, svc); err == errNotFound {
+               return fmt.Errorf("(BUG?) touch succeeded, but then block did not appear in index")
+       } else if err != nil {
+               return err
+       } else if latest.Add(blobsigttl).After(blobsigexp) {
+               return nil
+       } else {
+               return errTouchIneffective
+       }
+}
+
+// Untrash and update GC timestamps (as needed) on blocks referenced
+// by the given manifest, save a new collection and return the new
+// collection's UUID.
+func (rcvr recoverer) RecoverManifest(mtxt string) (string, error) {
+       ctx, cancel := context.WithCancel(context.Background())
+       defer cancel()
+
+       coll := arvados.Collection{ManifestText: mtxt}
+       blks, err := coll.SizedDigests()
+       if err != nil {
+               return "", err
+       }
+       todo := make(chan int, len(blks))
+       for idx := range blks {
+               todo <- idx
+       }
+       go close(todo)
+
+       var services []arvados.KeepService
+       err = rcvr.client.EachKeepService(func(svc arvados.KeepService) error {
+               if svc.ServiceType == "proxy" {
+                       rcvr.logger.WithField("service", svc).Debug("ignore proxy service")
+               } else {
+                       services = append(services, svc)
+               }
+               return nil
+       })
+       if err != nil {
+               return "", fmt.Errorf("error getting list of keep services: %s", err)
+       }
+       rcvr.logger.WithField("services", services).Debug("got list of services")
+
+       // blobsigexp is our deadline for saving the rescued
+       // collection. This must be less than BlobSigningTTL
+       // (otherwise our rescued blocks could be garbage collected
+       // again before we protect them by saving the collection) but
+       // the exact value is somewhat arbitrary. If it's too soon, it
+       // will arrive before we're ready to save, and save will
+       // fail. If it's too late, we'll needlessly update timestamps
+       // on some blocks that were recently written/touched (e.g., by
+       // a previous attempt to rescue this same collection) and
+       // would have lived long enough anyway if left alone.
+       // BlobSigningTTL/2 (typically around 1 week) is much longer
+       // than than we need to recover even a very large collection.
+       blobsigttl := rcvr.cluster.Collections.BlobSigningTTL.Duration()
+       blobsigexp := time.Now().Add(blobsigttl / 2)
+       rcvr.logger.WithField("blobsigexp", blobsigexp).Debug("chose save deadline")
+
+       // We'll start a number of threads, each working on
+       // checking/recovering one block at a time. The threads
+       // themselves don't need much CPU/memory, but to avoid hitting
+       // limits on keepstore connections, backend storage bandwidth,
+       // etc., we limit concurrency to 2 per keepstore node.
+       workerThreads := 2 * len(services)
+
+       blkFound := make([]bool, len(blks))
+       var wg sync.WaitGroup
+       for i := 0; i < workerThreads; i++ {
+               wg.Add(1)
+               go func() {
+                       defer wg.Done()
+               nextblk:
+                       for idx := range todo {
+                               blk := strings.SplitN(string(blks[idx]), "+", 2)[0]
+                               logger := rcvr.logger.WithField("block", blk)
+                               for _, untrashing := range []bool{false, true} {
+                                       for _, svc := range services {
+                                               logger := logger.WithField("service", fmt.Sprintf("%s:%d", svc.ServiceHost, svc.ServicePort))
+                                               if untrashing {
+                                                       if err := svc.Untrash(ctx, rcvr.client, blk); err != nil {
+                                                               logger.WithError(err).Debug("untrash failed")
+                                                               continue
+                                                       }
+                                                       logger.Info("untrashed")
+                                               }
+                                               err := rcvr.ensureSafe(ctx, logger, blk, svc, blobsigttl, blobsigexp)
+                                               if err == errNotFound {
+                                                       logger.Debug(err)
+                                               } else if err != nil {
+                                                       logger.Error(err)
+                                               } else {
+                                                       blkFound[idx] = true
+                                                       continue nextblk
+                                               }
+                                       }
+                               }
+                               logger.Debug("unrecoverable")
+                       }
+               }()
+       }
+       wg.Wait()
+
+       var have, havenot int
+       for _, ok := range blkFound {
+               if ok {
+                       have++
+               } else {
+                       havenot++
+               }
+       }
+       if havenot > 0 {
+               if have > 0 {
+                       rcvr.logger.Warn("partial recovery is not implemented")
+               }
+               return "", fmt.Errorf("unable to recover %d of %d blocks", havenot, have+havenot)
+       }
+
+       if rcvr.cluster.Collections.BlobSigning {
+               key := []byte(rcvr.cluster.Collections.BlobSigningKey)
+               coll.ManifestText = arvados.SignManifest(coll.ManifestText, rcvr.client.AuthToken, blobsigexp, blobsigttl, key)
+       }
+       rcvr.logger.WithField("manifest", coll.ManifestText).Debug("updated blob signatures in manifest")
+       err = rcvr.client.RequestAndDecodeContext(ctx, &coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
+               "collection": map[string]interface{}{
+                       "manifest_text": coll.ManifestText,
+               },
+       })
+       if err != nil {
+               return "", fmt.Errorf("error saving new collection: %s", err)
+       }
+       rcvr.logger.WithField("UUID", coll.UUID).Debug("created new collection")
+       return coll.UUID, nil
+}
diff --git a/lib/recovercollection/cmd_test.go b/lib/recovercollection/cmd_test.go

new file mode 100644 (file)

index 0000000..57c2c64
--- /dev/null
+++ b/lib/recovercollection/cmd_test.go
@@ -0,0 +1,136 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package recovercollection
+
+import (
+       "bytes"
+       "encoding/json"
+       "io/ioutil"
+       "os"
+       "testing"
+       "time"
+
+       "git.arvados.org/arvados.git/lib/config"
+       "git.arvados.org/arvados.git/sdk/go/arvadostest"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
+
+var _ = check.Suite(&Suite{})
+
+type Suite struct{}
+
+func (*Suite) SetUpSuite(c *check.C) {
+       arvadostest.StartAPI()
+       arvadostest.StartKeep(2, true)
+}
+
+func (*Suite) TestUnrecoverableBlock(c *check.C) {
+       tmp := c.MkDir()
+       mfile := tmp + "/manifest"
+       ioutil.WriteFile(mfile, []byte(". aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+410 0:410:Gone\n"), 0777)
+       var stdout, stderr bytes.Buffer
+       exitcode := Command.RunCommand("recovercollection.test", []string{"-log-level=debug", mfile}, &bytes.Buffer{}, &stdout, &stderr)
+       c.Check(exitcode, check.Equals, 1)
+       c.Check(stdout.String(), check.Equals, "")
+       c.Log(stderr.String())
+       c.Check(stderr.String(), check.Matches, `(?ms).*msg="not found" block=aaaaa.*`)
+       c.Check(stderr.String(), check.Matches, `(?ms).*msg="untrash failed" block=aaaaa.*`)
+       c.Check(stderr.String(), check.Matches, `(?ms).*msg=unrecoverable block=aaaaa.*`)
+       c.Check(stderr.String(), check.Matches, `(?ms).*msg="recovery failed".*`)
+}
+
+func (*Suite) TestUntrashAndTouchBlock(c *check.C) {
+       tmp := c.MkDir()
+       mfile := tmp + "/manifest"
+       ioutil.WriteFile(mfile, []byte(". dcd0348cb2532ee90c99f1b846efaee7+13 0:13:test.txt\n"), 0777)
+
+       logger := ctxlog.TestLogger(c)
+       loader := config.NewLoader(&bytes.Buffer{}, logger)
+       cfg, err := loader.Load()
+       c.Assert(err, check.IsNil)
+       cluster, err := cfg.GetCluster("")
+       c.Assert(err, check.IsNil)
+       var datadirs []string
+       for _, v := range cluster.Volumes {
+               var params struct {
+                       Root string
+               }
+               err := json.Unmarshal(v.DriverParameters, &params)
+               c.Assert(err, check.IsNil)
+               if params.Root != "" {
+                       datadirs = append(datadirs, params.Root)
+                       err := os.Remove(params.Root + "/dcd/dcd0348cb2532ee90c99f1b846efaee7")
+                       if err != nil && !os.IsNotExist(err) {
+                               c.Error(err)
+                       }
+               }
+       }
+       c.Logf("keepstore datadirs are %q", datadirs)
+
+       // Currently StartKeep(2, true) uses dirs called "keep0" and
+       // "keep1" so we could just put our fake trashed file in keep0
+       // ... but we don't want to rely on arvadostest's
+       // implementation details, so we put a trashed file in every
+       // dir that keepstore might be using.
+       for _, datadir := range datadirs {
+               if fi, err := os.Stat(datadir); err != nil || !fi.IsDir() {
+                       continue
+               }
+               c.Logf("placing backdated trashed block in datadir %q", datadir)
+               trashfile := datadir + "/dcd/dcd0348cb2532ee90c99f1b846efaee7.trash.999999999"
+               os.Mkdir(datadir+"/dcd", 0777)
+               err = ioutil.WriteFile(trashfile, []byte("undelete test"), 0777)
+               c.Assert(err, check.IsNil)
+               t := time.Now().Add(-time.Hour * 24 * 365)
+               err = os.Chtimes(trashfile, t, t)
+       }
+
+       var stdout, stderr bytes.Buffer
+       exitcode := Command.RunCommand("recovercollection.test", []string{"-log-level=debug", mfile}, &bytes.Buffer{}, &stdout, &stderr)
+       c.Check(exitcode, check.Equals, 0)
+       c.Check(stdout.String(), check.Matches, `zzzzz-4zz18-.{15}\n`)
+       c.Log(stderr.String())
+       c.Check(stderr.String(), check.Matches, `(?ms).*msg=untrashed block=dcd0348.*`)
+       c.Check(stderr.String(), check.Matches, `(?ms).*msg="updated timestamp" block=dcd0348.*`)
+
+       found := false
+       for _, datadir := range datadirs {
+               buf, err := ioutil.ReadFile(datadir + "/dcd/dcd0348cb2532ee90c99f1b846efaee7")
+               if err == nil {
+                       found = true
+                       c.Check(buf, check.DeepEquals, []byte("undelete test"))
+                       fi, err := os.Stat(datadir + "/dcd/dcd0348cb2532ee90c99f1b846efaee7")
+                       if c.Check(err, check.IsNil) {
+                               c.Logf("recovered block's modtime is %s", fi.ModTime())
+                               c.Check(time.Now().Sub(fi.ModTime()) < time.Hour, check.Equals, true)
+                       }
+               }
+       }
+       c.Check(found, check.Equals, true)
+}
+
+func (*Suite) TestUnusableManifestSourceArg(c *check.C) {
+       for _, trial := range []struct {
+               srcArg    string
+               errRegexp string
+       }{
+               {"zzzzz-4zz18-aaaaaaaaaaaaaaa", `(?ms).*msg="log entry not found".*`},
+               {"zzzzz-57u5n-aaaaaaaaaaaaaaa", `(?ms).*msg="log entry not found.*`},
+               {"zzzzz-57u5n-containerlog006", `(?ms).*msg="log entry properties\.old_attributes\.manifest_text missing or empty".*`},
+               {"zzzzz-j7d0g-aaaaaaaaaaaaaaa", `(?ms).*msg="looks like a UUID but not a log or collection UUID.*`},
+       } {
+               var stdout, stderr bytes.Buffer
+               exitcode := Command.RunCommand("recovercollection.test", []string{"-log-level=debug", trial.srcArg}, &bytes.Buffer{}, &stdout, &stderr)
+               c.Check(exitcode, check.Equals, 1)
+               c.Check(stdout.String(), check.Equals, "")
+               c.Log(stderr.String())
+               c.Check(stderr.String(), check.Matches, trial.errRegexp)
+       }
+}
diff --git a/sdk/go/arvados/blob_signature.go b/sdk/go/arvados/blob_signature.go

new file mode 100644 (file)

index 0000000..1329395
--- /dev/null
+++ b/sdk/go/arvados/blob_signature.go
@@ -0,0 +1,126 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+// Generate and verify permission signatures for Keep locators.
+//
+// See https://dev.arvados.org/projects/arvados/wiki/Keep_locator_format
+
+package arvados
+
+import (
+       "crypto/hmac"
+       "crypto/sha1"
+       "errors"
+       "fmt"
+       "regexp"
+       "strconv"
+       "strings"
+       "time"
+)
+
+var (
+       // ErrSignatureExpired - a signature was rejected because the
+       // expiry time has passed.
+       ErrSignatureExpired = errors.New("Signature expired")
+       // ErrSignatureInvalid - a signature was rejected because it
+       // was badly formatted or did not match the given secret key.
+       ErrSignatureInvalid = errors.New("Invalid signature")
+       // ErrSignatureMissing - the given locator does not have a
+       // signature hint.
+       ErrSignatureMissing = errors.New("Missing signature")
+)
+
+// makePermSignature generates a SHA-1 HMAC digest for the given blob,
+// token, expiry, and site secret.
+func makePermSignature(blobHash, apiToken, expiry, blobSignatureTTL string, permissionSecret []byte) string {
+       hmac := hmac.New(sha1.New, permissionSecret)
+       hmac.Write([]byte(blobHash))
+       hmac.Write([]byte("@"))
+       hmac.Write([]byte(apiToken))
+       hmac.Write([]byte("@"))
+       hmac.Write([]byte(expiry))
+       hmac.Write([]byte("@"))
+       hmac.Write([]byte(blobSignatureTTL))
+       digest := hmac.Sum(nil)
+       return fmt.Sprintf("%x", digest)
+}
+
+var (
+       mBlkRe      = regexp.MustCompile(`^[0-9a-f]{32}.*`)
+       mPermHintRe = regexp.MustCompile(`\+A[^+]*`)
+)
+
+// SignManifest signs all locators in the given manifest, discarding
+// any existing signatures.
+func SignManifest(manifest string, apiToken string, expiry time.Time, ttl time.Duration, permissionSecret []byte) string {
+       return regexp.MustCompile(`\S+`).ReplaceAllStringFunc(manifest, func(tok string) string {
+               if mBlkRe.MatchString(tok) {
+                       return SignLocator(mPermHintRe.ReplaceAllString(tok, ""), apiToken, expiry, ttl, permissionSecret)
+               } else {
+                       return tok
+               }
+       })
+}
+
+// SignLocator returns blobLocator with a permission signature
+// added. If either permissionSecret or apiToken is empty, blobLocator
+// is returned untouched.
+//
+// This function is intended to be used by system components and admin
+// utilities: userland programs do not know the permissionSecret.
+func SignLocator(blobLocator, apiToken string, expiry time.Time, blobSignatureTTL time.Duration, permissionSecret []byte) string {
+       if len(permissionSecret) == 0 || apiToken == "" {
+               return blobLocator
+       }
+       // Strip off all hints: only the hash is used to sign.
+       blobHash := strings.Split(blobLocator, "+")[0]
+       timestampHex := fmt.Sprintf("%08x", expiry.Unix())
+       blobSignatureTTLHex := strconv.FormatInt(int64(blobSignatureTTL.Seconds()), 16)
+       return blobLocator +
+               "+A" + makePermSignature(blobHash, apiToken, timestampHex, blobSignatureTTLHex, permissionSecret) +
+               "@" + timestampHex
+}
+
+var SignedLocatorRe = regexp.MustCompile(
+       //1                 2          34                         5   6                  7                 89
+       `^([[:xdigit:]]{32})(\+[0-9]+)?((\+[B-Z][A-Za-z0-9@_-]*)*)(\+A([[:xdigit:]]{40})@([[:xdigit:]]{8}))((\+[B-Z][A-Za-z0-9@_-]*)*)$`)
+
+// VerifySignature returns nil if the signature on the signedLocator
+// can be verified using the given apiToken. Otherwise it returns
+// ErrSignatureExpired (if the signature's expiry time has passed,
+// which is something the client could have figured out
+// independently), ErrSignatureMissing (if there is no signature hint
+// at all), or ErrSignatureInvalid (if the signature is present but
+// badly formatted or incorrect).
+//
+// This function is intended to be used by system components and admin
+// utilities: userland programs do not know the permissionSecret.
+func VerifySignature(signedLocator, apiToken string, blobSignatureTTL time.Duration, permissionSecret []byte) error {
+       matches := SignedLocatorRe.FindStringSubmatch(signedLocator)
+       if matches == nil {
+               return ErrSignatureMissing
+       }
+       blobHash := matches[1]
+       signatureHex := matches[6]
+       expiryHex := matches[7]
+       if expiryTime, err := parseHexTimestamp(expiryHex); err != nil {
+               return ErrSignatureInvalid
+       } else if expiryTime.Before(time.Now()) {
+               return ErrSignatureExpired
+       }
+       blobSignatureTTLHex := strconv.FormatInt(int64(blobSignatureTTL.Seconds()), 16)
+       if signatureHex != makePermSignature(blobHash, apiToken, expiryHex, blobSignatureTTLHex, permissionSecret) {
+               return ErrSignatureInvalid
+       }
+       return nil
+}
+
+func parseHexTimestamp(timestampHex string) (ts time.Time, err error) {
+       if tsInt, e := strconv.ParseInt(timestampHex, 16, 0); e == nil {
+               ts = time.Unix(tsInt, 0)
+       } else {
+               err = e
+       }
+       return ts, err
+}
diff --git a/sdk/go/arvados/blob_signature_test.go b/sdk/go/arvados/blob_signature_test.go

new file mode 100644 (file)

index 0000000..847f9a8
--- /dev/null
+++ b/sdk/go/arvados/blob_signature_test.go
@@ -0,0 +1,88 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package arvados
+
+import (
+       "time"
+
+       check "gopkg.in/check.v1"
+)
+
+const (
+       knownHash    = "acbd18db4cc2f85cedef654fccc4a4d8"
+       knownLocator = knownHash + "+3"
+       knownToken   = "hocfupkn2pjhrpgp2vxv8rsku7tvtx49arbc9s4bvu7p7wxqvk"
+       knownKey     = "13u9fkuccnboeewr0ne3mvapk28epf68a3bhj9q8sb4l6e4e5mkk" +
+               "p6nhj2mmpscgu1zze5h5enydxfe3j215024u16ij4hjaiqs5u4pzsl3nczmaoxnc" +
+               "ljkm4875xqn4xv058koz3vkptmzhyheiy6wzevzjmdvxhvcqsvr5abhl15c2d4o4" +
+               "jhl0s91lojy1mtrzqqvprqcverls0xvy9vai9t1l1lvvazpuadafm71jl4mrwq2y" +
+               "gokee3eamvjy8qq1fvy238838enjmy5wzy2md7yvsitp5vztft6j4q866efym7e6" +
+               "vu5wm9fpnwjyxfldw3vbo01mgjs75rgo7qioh8z8ij7jpyp8508okhgbbex3ceei" +
+               "786u5rw2a9gx743dj3fgq2irk"
+       knownSignature     = "89118b78732c33104a4d6231e8b5a5fa1e4301e3"
+       knownTimestamp     = "7fffffff"
+       knownSigHint       = "+A" + knownSignature + "@" + knownTimestamp
+       knownSignedLocator = knownLocator + knownSigHint
+       blobSignatureTTL   = 1209600 * time.Second
+)
+
+var _ = check.Suite(&BlobSignatureSuite{})
+
+type BlobSignatureSuite struct{}
+
+func (s *BlobSignatureSuite) TestSignLocator(c *check.C) {
+       ts, err := parseHexTimestamp(knownTimestamp)
+       c.Check(err, check.IsNil)
+       c.Check(SignLocator(knownLocator, knownToken, ts, blobSignatureTTL, []byte(knownKey)), check.Equals, knownSignedLocator)
+}
+
+func (s *BlobSignatureSuite) TestVerifySignature(c *check.C) {
+       c.Check(VerifySignature(knownSignedLocator, knownToken, blobSignatureTTL, []byte(knownKey)), check.IsNil)
+}
+
+func (s *BlobSignatureSuite) TestVerifySignatureExtraHints(c *check.C) {
+       // handle hint before permission signature
+       c.Check(VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint, knownToken, blobSignatureTTL, []byte(knownKey)), check.IsNil)
+
+       // handle hint after permission signature
+       c.Check(VerifySignature(knownLocator+knownSigHint+"+Zfoo", knownToken, blobSignatureTTL, []byte(knownKey)), check.IsNil)
+
+       // handle hints around permission signature
+       c.Check(VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint+"+Zfoo", knownToken, blobSignatureTTL, []byte(knownKey)), check.IsNil)
+}
+
+// The size hint on the locator string should not affect signature
+// validation.
+func (s *BlobSignatureSuite) TestVerifySignatureWrongSize(c *check.C) {
+       // handle incorrect size hint
+       c.Check(VerifySignature(knownHash+"+999999"+knownSigHint, knownToken, blobSignatureTTL, []byte(knownKey)), check.IsNil)
+
+       // handle missing size hint
+       c.Check(VerifySignature(knownHash+knownSigHint, knownToken, blobSignatureTTL, []byte(knownKey)), check.IsNil)
+}
+
+func (s *BlobSignatureSuite) TestVerifySignatureBadSig(c *check.C) {
+       badLocator := knownLocator + "+Aaaaaaaaaaaaaaaa@" + knownTimestamp
+       c.Check(VerifySignature(badLocator, knownToken, blobSignatureTTL, []byte(knownKey)), check.Equals, ErrSignatureMissing)
+}
+
+func (s *BlobSignatureSuite) TestVerifySignatureBadTimestamp(c *check.C) {
+       badLocator := knownLocator + "+A" + knownSignature + "@OOOOOOOl"
+       c.Check(VerifySignature(badLocator, knownToken, blobSignatureTTL, []byte(knownKey)), check.Equals, ErrSignatureMissing)
+}
+
+func (s *BlobSignatureSuite) TestVerifySignatureBadSecret(c *check.C) {
+       c.Check(VerifySignature(knownSignedLocator, knownToken, blobSignatureTTL, []byte("00000000000000000000")), check.Equals, ErrSignatureInvalid)
+}
+
+func (s *BlobSignatureSuite) TestVerifySignatureBadToken(c *check.C) {
+       c.Check(VerifySignature(knownSignedLocator, "00000000", blobSignatureTTL, []byte(knownKey)), check.Equals, ErrSignatureInvalid)
+}
+
+func (s *BlobSignatureSuite) TestVerifySignatureExpired(c *check.C) {
+       yesterday := time.Now().AddDate(0, 0, -1)
+       expiredLocator := SignLocator(knownHash, knownToken, yesterday, blobSignatureTTL, []byte(knownKey))
+       c.Check(VerifySignature(expiredLocator, knownToken, blobSignatureTTL, []byte(knownKey)), check.Equals, ErrSignatureExpired)
+}
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go

index 1efc87ea72ac6f67496e0b4df931905092f2c6fa..029e223218b2a5136b8eac2238b088e2ce4fb983 100644 (file)
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -156,6 +156,15 @@ type Cluster struct {
                         ClientSecret            string
                         AlternateEmailAddresses bool
                 }
+               OpenIDConnect struct {
+                       Enable             bool
+                       Issuer             string
+                       ClientID           string
+                       ClientSecret       string
+                       EmailClaim         string
+                       EmailVerifiedClaim string
+                       UsernameClaim      string
+               }
                 PAM struct {
                         Enable             bool
                         Service            string
diff --git a/sdk/go/arvados/container.go b/sdk/go/arvados/container.go

index a7edec76dc3c2f77bae447d24585927469c89c77..3d08f2235a0c488c902b6e6d3b0ccce273ea6690 100644 (file)
--- a/sdk/go/arvados/container.go
+++ b/sdk/go/arvados/container.go
@@ -108,6 +108,14 @@ type ContainerList struct {
         Limit          int         `json:"limit"`
  }
  
+// ContainerRequestList is an arvados#containerRequestList resource.
+type ContainerRequestList struct {
+       Items          []ContainerRequest `json:"items"`
+       ItemsAvailable int                `json:"items_available"`
+       Offset         int                `json:"offset"`
+       Limit          int                `json:"limit"`
+}
+
  // ContainerState is a string corresponding to a valid Container state.
  type ContainerState string
  
diff --git a/sdk/go/arvados/keep_service.go b/sdk/go/arvados/keep_service.go

index 97a62fa7bb3933b89e83e428fc4da39de7453fcd..3af7479202efb46df838e48246ccf9c2f0b5b100 100644 (file)
--- a/sdk/go/arvados/keep_service.go
+++ b/sdk/go/arvados/keep_service.go
@@ -6,7 +6,9 @@ package arvados
  
  import (
         "bufio"
+       "context"
         "fmt"
+       "io/ioutil"
         "net/http"
         "strconv"
         "strings"
@@ -102,6 +104,42 @@ func (s *KeepService) Mounts(c *Client) ([]KeepMount, error) {
         return mounts, nil
  }
  
+// Touch updates the timestamp on the given block.
+func (s *KeepService) Touch(ctx context.Context, c *Client, blk string) error {
+       req, err := http.NewRequest("TOUCH", s.url(blk), nil)
+       if err != nil {
+               return err
+       }
+       resp, err := c.Do(req.WithContext(ctx))
+       if err != nil {
+               return err
+       }
+       defer resp.Body.Close()
+       if resp.StatusCode != http.StatusOK {
+               body, _ := ioutil.ReadAll(resp.Body)
+               return fmt.Errorf("%s %s: %s", resp.Proto, resp.Status, body)
+       }
+       return nil
+}
+
+// Untrash moves/copies the given block out of trash.
+func (s *KeepService) Untrash(ctx context.Context, c *Client, blk string) error {
+       req, err := http.NewRequest("PUT", s.url("untrash/"+blk), nil)
+       if err != nil {
+               return err
+       }
+       resp, err := c.Do(req.WithContext(ctx))
+       if err != nil {
+               return err
+       }
+       defer resp.Body.Close()
+       if resp.StatusCode != http.StatusOK {
+               body, _ := ioutil.ReadAll(resp.Body)
+               return fmt.Errorf("%s %s: %s", resp.Proto, resp.Status, body)
+       }
+       return nil
+}
+
  // Index returns an unsorted list of blocks at the given mount point.
  func (s *KeepService) IndexMount(c *Client, mountUUID string, prefix string) ([]KeepServiceIndexEntry, error) {
         return s.index(c, s.url("mounts/"+mountUUID+"/blocks?prefix="+prefix))
diff --git a/sdk/go/keepclient/perms.go b/sdk/go/keepclient/perms.go

index a77983322d618158f0eb8ff6ba6af47814bb894d..23ca7d2f2b43cf778ce16c22f34fa20bd524f091 100644 (file)
--- a/sdk/go/keepclient/perms.go
+++ b/sdk/go/keepclient/perms.go
@@ -2,108 +2,15 @@
  //
  // SPDX-License-Identifier: Apache-2.0
  
-// Generate and verify permission signatures for Keep locators.
-//
-// See https://dev.arvados.org/projects/arvados/wiki/Keep_locator_format
-
  package keepclient
  
-import (
-       "crypto/hmac"
-       "crypto/sha1"
-       "errors"
-       "fmt"
-       "regexp"
-       "strconv"
-       "strings"
-       "time"
-)
+import "git.arvados.org/arvados.git/sdk/go/arvados"
  
  var (
-       // ErrSignatureExpired - a signature was rejected because the
-       // expiry time has passed.
-       ErrSignatureExpired = errors.New("Signature expired")
-       // ErrSignatureInvalid - a signature was rejected because it
-       // was badly formatted or did not match the given secret key.
-       ErrSignatureInvalid = errors.New("Invalid signature")
-       // ErrSignatureMissing - the given locator does not have a
-       // signature hint.
-       ErrSignatureMissing = errors.New("Missing signature")
+       ErrSignatureExpired = arvados.ErrSignatureExpired
+       ErrSignatureInvalid = arvados.ErrSignatureInvalid
+       ErrSignatureMissing = arvados.ErrSignatureMissing
+       SignLocator         = arvados.SignLocator
+       SignedLocatorRe     = arvados.SignedLocatorRe
+       VerifySignature     = arvados.VerifySignature
  )
-
-// makePermSignature generates a SHA-1 HMAC digest for the given blob,
-// token, expiry, and site secret.
-func makePermSignature(blobHash, apiToken, expiry, blobSignatureTTL string, permissionSecret []byte) string {
-       hmac := hmac.New(sha1.New, permissionSecret)
-       hmac.Write([]byte(blobHash))
-       hmac.Write([]byte("@"))
-       hmac.Write([]byte(apiToken))
-       hmac.Write([]byte("@"))
-       hmac.Write([]byte(expiry))
-       hmac.Write([]byte("@"))
-       hmac.Write([]byte(blobSignatureTTL))
-       digest := hmac.Sum(nil)
-       return fmt.Sprintf("%x", digest)
-}
-
-// SignLocator returns blobLocator with a permission signature
-// added. If either permissionSecret or apiToken is empty, blobLocator
-// is returned untouched.
-//
-// This function is intended to be used by system components and admin
-// utilities: userland programs do not know the permissionSecret.
-func SignLocator(blobLocator, apiToken string, expiry time.Time, blobSignatureTTL time.Duration, permissionSecret []byte) string {
-       if len(permissionSecret) == 0 || apiToken == "" {
-               return blobLocator
-       }
-       // Strip off all hints: only the hash is used to sign.
-       blobHash := strings.Split(blobLocator, "+")[0]
-       timestampHex := fmt.Sprintf("%08x", expiry.Unix())
-       blobSignatureTTLHex := strconv.FormatInt(int64(blobSignatureTTL.Seconds()), 16)
-       return blobLocator +
-               "+A" + makePermSignature(blobHash, apiToken, timestampHex, blobSignatureTTLHex, permissionSecret) +
-               "@" + timestampHex
-}
-
-var SignedLocatorRe = regexp.MustCompile(
-       //1                 2          34                         5   6                  7                 89
-       `^([[:xdigit:]]{32})(\+[0-9]+)?((\+[B-Z][A-Za-z0-9@_-]*)*)(\+A([[:xdigit:]]{40})@([[:xdigit:]]{8}))((\+[B-Z][A-Za-z0-9@_-]*)*)$`)
-
-// VerifySignature returns nil if the signature on the signedLocator
-// can be verified using the given apiToken. Otherwise it returns
-// ErrSignatureExpired (if the signature's expiry time has passed,
-// which is something the client could have figured out
-// independently), ErrSignatureMissing (if there is no signature hint
-// at all), or ErrSignatureInvalid (if the signature is present but
-// badly formatted or incorrect).
-//
-// This function is intended to be used by system components and admin
-// utilities: userland programs do not know the permissionSecret.
-func VerifySignature(signedLocator, apiToken string, blobSignatureTTL time.Duration, permissionSecret []byte) error {
-       matches := SignedLocatorRe.FindStringSubmatch(signedLocator)
-       if matches == nil {
-               return ErrSignatureMissing
-       }
-       blobHash := matches[1]
-       signatureHex := matches[6]
-       expiryHex := matches[7]
-       if expiryTime, err := parseHexTimestamp(expiryHex); err != nil {
-               return ErrSignatureInvalid
-       } else if expiryTime.Before(time.Now()) {
-               return ErrSignatureExpired
-       }
-       blobSignatureTTLHex := strconv.FormatInt(int64(blobSignatureTTL.Seconds()), 16)
-       if signatureHex != makePermSignature(blobHash, apiToken, expiryHex, blobSignatureTTLHex, permissionSecret) {
-               return ErrSignatureInvalid
-       }
-       return nil
-}
-
-func parseHexTimestamp(timestampHex string) (ts time.Time, err error) {
-       if tsInt, e := strconv.ParseInt(timestampHex, 16, 0); e == nil {
-               ts = time.Unix(tsInt, 0)
-       } else {
-               err = e
-       }
-       return ts, err
-}
diff --git a/sdk/go/keepclient/perms_test.go b/sdk/go/keepclient/perms_test.go

deleted file mode 100644 (file)

index f8107f4..0000000
--- a/sdk/go/keepclient/perms_test.go
+++ /dev/null
@@ -1,103 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-package keepclient
-
-import (
-       "testing"
-       "time"
-)
-
-const (
-       knownHash    = "acbd18db4cc2f85cedef654fccc4a4d8"
-       knownLocator = knownHash + "+3"
-       knownToken   = "hocfupkn2pjhrpgp2vxv8rsku7tvtx49arbc9s4bvu7p7wxqvk"
-       knownKey     = "13u9fkuccnboeewr0ne3mvapk28epf68a3bhj9q8sb4l6e4e5mkk" +
-               "p6nhj2mmpscgu1zze5h5enydxfe3j215024u16ij4hjaiqs5u4pzsl3nczmaoxnc" +
-               "ljkm4875xqn4xv058koz3vkptmzhyheiy6wzevzjmdvxhvcqsvr5abhl15c2d4o4" +
-               "jhl0s91lojy1mtrzqqvprqcverls0xvy9vai9t1l1lvvazpuadafm71jl4mrwq2y" +
-               "gokee3eamvjy8qq1fvy238838enjmy5wzy2md7yvsitp5vztft6j4q866efym7e6" +
-               "vu5wm9fpnwjyxfldw3vbo01mgjs75rgo7qioh8z8ij7jpyp8508okhgbbex3ceei" +
-               "786u5rw2a9gx743dj3fgq2irk"
-       knownSignature     = "89118b78732c33104a4d6231e8b5a5fa1e4301e3"
-       knownTimestamp     = "7fffffff"
-       knownSigHint       = "+A" + knownSignature + "@" + knownTimestamp
-       knownSignedLocator = knownLocator + knownSigHint
-       blobSignatureTTL   = 1209600 * time.Second
-)
-
-func TestSignLocator(t *testing.T) {
-       if ts, err := parseHexTimestamp(knownTimestamp); err != nil {
-               t.Errorf("bad knownTimestamp %s", knownTimestamp)
-       } else {
-               if knownSignedLocator != SignLocator(knownLocator, knownToken, ts, blobSignatureTTL, []byte(knownKey)) {
-                       t.Fail()
-               }
-       }
-}
-
-func TestVerifySignature(t *testing.T) {
-       if VerifySignature(knownSignedLocator, knownToken, blobSignatureTTL, []byte(knownKey)) != nil {
-               t.Fail()
-       }
-}
-
-func TestVerifySignatureExtraHints(t *testing.T) {
-       if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint, knownToken, blobSignatureTTL, []byte(knownKey)) != nil {
-               t.Fatal("Verify cannot handle hint before permission signature")
-       }
-
-       if VerifySignature(knownLocator+knownSigHint+"+Zfoo", knownToken, blobSignatureTTL, []byte(knownKey)) != nil {
-               t.Fatal("Verify cannot handle hint after permission signature")
-       }
-
-       if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint+"+Zfoo", knownToken, blobSignatureTTL, []byte(knownKey)) != nil {
-               t.Fatal("Verify cannot handle hints around permission signature")
-       }
-}
-
-// The size hint on the locator string should not affect signature validation.
-func TestVerifySignatureWrongSize(t *testing.T) {
-       if VerifySignature(knownHash+"+999999"+knownSigHint, knownToken, blobSignatureTTL, []byte(knownKey)) != nil {
-               t.Fatal("Verify cannot handle incorrect size hint")
-       }
-
-       if VerifySignature(knownHash+knownSigHint, knownToken, blobSignatureTTL, []byte(knownKey)) != nil {
-               t.Fatal("Verify cannot handle missing size hint")
-       }
-}
-
-func TestVerifySignatureBadSig(t *testing.T) {
-       badLocator := knownLocator + "+Aaaaaaaaaaaaaaaa@" + knownTimestamp
-       if VerifySignature(badLocator, knownToken, blobSignatureTTL, []byte(knownKey)) != ErrSignatureMissing {
-               t.Fail()
-       }
-}
-
-func TestVerifySignatureBadTimestamp(t *testing.T) {
-       badLocator := knownLocator + "+A" + knownSignature + "@OOOOOOOl"
-       if VerifySignature(badLocator, knownToken, blobSignatureTTL, []byte(knownKey)) != ErrSignatureMissing {
-               t.Fail()
-       }
-}
-
-func TestVerifySignatureBadSecret(t *testing.T) {
-       if VerifySignature(knownSignedLocator, knownToken, blobSignatureTTL, []byte("00000000000000000000")) != ErrSignatureInvalid {
-               t.Fail()
-       }
-}
-
-func TestVerifySignatureBadToken(t *testing.T) {
-       if VerifySignature(knownSignedLocator, "00000000", blobSignatureTTL, []byte(knownKey)) != ErrSignatureInvalid {
-               t.Fail()
-       }
-}
-
-func TestVerifySignatureExpired(t *testing.T) {
-       yesterday := time.Now().AddDate(0, 0, -1)
-       expiredLocator := SignLocator(knownHash, knownToken, yesterday, blobSignatureTTL, []byte(knownKey))
-       if VerifySignature(expiredLocator, knownToken, blobSignatureTTL, []byte(knownKey)) != ErrSignatureExpired {
-               t.Fail()
-       }
-}
diff --git a/sdk/python/setup.py b/sdk/python/setup.py

index a726b49fe3814a7d51d7fcb32420ad98abb4150d..589533177a4b83b5c481e2ff122b7594d536133a 100644 (file)
--- a/sdk/python/setup.py
+++ b/sdk/python/setup.py
@@ -54,6 +54,7 @@ setup(name='arvados-python-client',
            'ruamel.yaml >=0.15.54, <=0.16.5',
            'setuptools',
            'ws4py >=0.4.2',
+          'rsa < 4.1'
        ],
        extras_require={
            ':os.name=="posix" and python_version<"3"': ['subprocess32 >= 3.5.1'],
diff --git a/services/api/Gemfile.lock b/services/api/Gemfile.lock

index 9f3a5fb2b3d787e1a320d8dc3734126ac1ee33f4..c8a1a27b79c1e939438dcb8ed603c206a299c409 100644 (file)
--- a/services/api/Gemfile.lock
+++ b/services/api/Gemfile.lock
@@ -273,7 +273,7 @@ GEM
        json (>= 1.8.0)
      websocket-driver (0.6.5)
        websocket-extensions (>= 0.1.0)
-    websocket-extensions (0.1.3)
+    websocket-extensions (0.1.5)
  
  PLATFORMS
    ruby
diff --git a/services/api/app/controllers/database_controller.rb b/services/api/app/controllers/database_controller.rb

index d6045a5dcbf35a3c786bb6db5105d49e9636cc39..5c4cf7bc16c22ad8d8780714d9b0165cf2c4043b 100644 (file)
--- a/services/api/app/controllers/database_controller.rb
+++ b/services/api/app/controllers/database_controller.rb
@@ -75,9 +75,10 @@ class DatabaseController < ApplicationController
        raise
      end
  
-    require 'refresh_permission_view'
+    require 'update_permissions'
  
-    refresh_permission_view
+    refresh_permissions
+    refresh_trashed
  
      # Done.
      send_json success: true
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb

index 816dbf4758dd0baa6e4ca438434b0b770fd1c0b7..8afebfb79eab56e24e83394f3923469d28faba94 100644 (file)
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -285,10 +285,13 @@ class ArvadosModel < ApplicationRecord
      sql_conds = nil
      user_uuids = users_list.map { |u| u.uuid }
  
+    # For details on how the trashed_groups table is constructed, see
+    # see db/migrate/20200501150153_permission_table.rb
+
      exclude_trashed_records = ""
      if !include_trash and (sql_table == "groups" or sql_table == "collections") then
-      # Only include records that are not explicitly trashed
-      exclude_trashed_records = "AND #{sql_table}.is_trashed = false"
+      # Only include records that are not trashed
+      exclude_trashed_records = "AND (#{sql_table}.trash_at is NULL or #{sql_table}.trash_at > statement_timestamp())"
      end
  
      if users_list.select { |u| u.is_admin }.any?
@@ -296,16 +299,28 @@ class ArvadosModel < ApplicationRecord
        if !include_trash
          if sql_table != "api_client_authorizations"
            # Only include records where the owner is not trashed
-          sql_conds = "#{sql_table}.owner_uuid NOT IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
-                      "WHERE trashed = 1) #{exclude_trashed_records}"
+          sql_conds = "#{sql_table}.owner_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} "+
+                      "where trash_at <= statement_timestamp()) #{exclude_trashed_records}"
          end
        end
      else
        trashed_check = ""
        if !include_trash then
-        trashed_check = "AND trashed = 0"
+        trashed_check = "AND target_uuid NOT IN (SELECT group_uuid FROM #{TRASHED_GROUPS} where trash_at <= statement_timestamp())"
        end
  
+      # The core of the permission check is a join against the
+      # materialized_permissions table to determine if the user has at
+      # least read permission to either the object itself or its
+      # direct owner (if traverse_owned is true).  See
+      # db/migrate/20200501150153_permission_table.rb for details on
+      # how the permissions are computed.
+
+      # A user can have can_manage access to another user, this grants
+      # full access to all that user's stuff.  To implement that we
+      # need to include those other users in the permission query.
+      user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: ":user_uuids", perm_level: 1}
+
        # Note: it is possible to combine the direct_check and
        # owner_check into a single EXISTS() clause, however it turns
        # out query optimizer doesn't like it and forces a sequential
@@ -316,13 +331,28 @@ class ArvadosModel < ApplicationRecord
  
        # Match a direct read permission link from the user to the record uuid
        direct_check = "#{sql_table}.uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
-                     "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check})"
+                     "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check})"
  
-      # Match a read permission link from the user to the record's owner_uuid
+      # Match a read permission for the user to the record's
+      # owner_uuid.  This is so we can have a permissions table that
+      # mostly consists of users and groups (projects are a type of
+      # group) and not have to compute and list user permission to
+      # every single object in the system.
+      #
+      # Don't do this for API keys (special behavior) or groups
+      # (already covered by direct_check).
+      #
+      # The traverse_owned flag indicates whether the permission to
+      # read an object also implies transitive permission to read
+      # things the object owns.  The situation where this is important
+      # are determining if we can read an object owned by another
+      # user.  This makes it possible to have permission to read the
+      # user record without granting permission to read things the
+      # other user owns.
        owner_check = ""
        if sql_table != "api_client_authorizations" and sql_table != "groups" then
          owner_check = "OR #{sql_table}.owner_uuid IN (SELECT target_uuid FROM #{PERMISSION_VIEW} "+
-          "WHERE user_uuid IN (:user_uuids) AND perm_level >= 1 #{trashed_check} AND target_owner_uuid IS NOT NULL) "
+          "WHERE user_uuid IN (#{user_uuids_subquery}) AND perm_level >= 1 #{trashed_check} AND traverse_owned) "
        end
  
        links_cond = ""
@@ -331,7 +361,7 @@ class ArvadosModel < ApplicationRecord
          # users some permission _or_ gives anyone else permission to
          # view one of the authorized users.
          links_cond = "OR (#{sql_table}.link_class IN (:permission_link_classes) AND "+
-                       "(#{sql_table}.head_uuid IN (:user_uuids) OR #{sql_table}.tail_uuid IN (:user_uuids)))"
+                       "(#{sql_table}.head_uuid IN (#{user_uuids_subquery}) OR #{sql_table}.tail_uuid IN (#{user_uuids_subquery})))"
        end
  
        sql_conds = "(#{direct_check} #{owner_check} #{links_cond}) #{exclude_trashed_records}"
diff --git a/services/api/app/models/database_seeds.rb b/services/api/app/models/database_seeds.rb

index 6e7ab9b07677421ca76dcd6ddb57bc98ff8cb7a8..39f491503ee583033f80ae72ef982261c6ba0af9 100644 (file)
--- a/services/api/app/models/database_seeds.rb
+++ b/services/api/app/models/database_seeds.rb
@@ -2,6 +2,8 @@
  #
  # SPDX-License-Identifier: AGPL-3.0
  
+require 'update_permissions'
+
  class DatabaseSeeds
    extend CurrentApiClient
    def self.install
@@ -12,5 +14,7 @@ class DatabaseSeeds
      anonymous_group_read_permission
      anonymous_user
      empty_collection
+    refresh_permissions
+    refresh_trashed
    end
  end
diff --git a/services/api/app/models/group.rb b/services/api/app/models/group.rb

index 1f2b0d8b776a1f63ca94d0e3e7654e7f9cd5887b..36814a3163e074433dfb3f5beeffb77510740475 100644 (file)
--- a/services/api/app/models/group.rb
+++ b/services/api/app/models/group.rb
@@ -17,9 +17,15 @@ class Group < ArvadosModel
    attribute :properties, :jsonbHash, default: {}
  
    validate :ensure_filesystem_compatible_name
-  after_create :invalidate_permissions_cache
-  after_update :maybe_invalidate_permissions_cache
    before_create :assign_name
+  after_create :after_ownership_change
+  after_create :update_trash
+
+  before_update :before_ownership_change
+  after_update :after_ownership_change
+
+  after_update :update_trash
+  before_destroy :clear_permissions_and_trash
  
    api_accessible :user, extend: :common do |t|
      t.add :name
@@ -38,18 +44,58 @@ class Group < ArvadosModel
      super if group_class == 'project'
    end
  
-  def maybe_invalidate_permissions_cache
-    if uuid_changed? or owner_uuid_changed? or is_trashed_changed?
-      # This can change users' permissions on other groups as well as
-      # this one.
-      invalidate_permissions_cache
+  def update_trash
+    if trash_at_changed? or owner_uuid_changed?
+      # The group was added or removed from the trash.
+      #
+      # Strategy:
+      #   Compute project subtree, propagating trash_at to subprojects
+      #   Remove groups that don't belong from trash
+      #   Add/update groups that do belong in the trash
+
+      temptable = "group_subtree_#{rand(2**64).to_s(10)}"
+      ActiveRecord::Base.connection.exec_query %{
+create temporary table #{temptable} on commit drop
+as select * from project_subtree_with_trash_at($1, LEAST($2, $3)::timestamp)
+},
+                                               'Group.update_trash.select',
+                                               [[nil, self.uuid],
+                                                [nil, TrashedGroup.find_by_group_uuid(self.owner_uuid).andand.trash_at],
+                                                [nil, self.trash_at]]
+
+      ActiveRecord::Base.connection.exec_delete %{
+delete from trashed_groups where group_uuid in (select target_uuid from #{temptable} where trash_at is NULL);
+},
+                                            "Group.update_trash.delete"
+
+      ActiveRecord::Base.connection.exec_query %{
+insert into trashed_groups (group_uuid, trash_at)
+  select target_uuid as group_uuid, trash_at from #{temptable} where trash_at is not NULL
+on conflict (group_uuid) do update set trash_at=EXCLUDED.trash_at;
+},
+                                            "Group.update_trash.insert"
+    end
+  end
+
+  def before_ownership_change
+    if owner_uuid_changed? and !self.owner_uuid_was.nil?
+      MaterializedPermission.where(user_uuid: owner_uuid_was, target_uuid: uuid).delete_all
+      update_permissions self.owner_uuid_was, self.uuid, REVOKE_PERM
+    end
+  end
+
+  def after_ownership_change
+    if owner_uuid_changed?
+      update_permissions self.owner_uuid, self.uuid, CAN_MANAGE_PERM
      end
    end
  
-  def invalidate_permissions_cache
-    # Ensure a new group can be accessed by the appropriate users
-    # immediately after being created.
-    User.invalidate_permissions_cache self.async_permissions_update
+  def clear_permissions_and_trash
+    MaterializedPermission.where(target_uuid: uuid).delete_all
+    ActiveRecord::Base.connection.exec_delete %{
+delete from trashed_groups where group_uuid=$1
+}, "Group.clear_permissions_and_trash", [[nil, self.uuid]]
+
    end
  
    def assign_name
diff --git a/services/api/app/models/link.rb b/services/api/app/models/link.rb

index ad7800fe679cb91936bde76f00566873cb369419..21d89767c7139a0c8b7ae8eb67595d6ebe336110 100644 (file)
--- a/services/api/app/models/link.rb
+++ b/services/api/app/models/link.rb
@@ -11,12 +11,13 @@ class Link < ArvadosModel
    # already know how to properly treat them.
    attribute :properties, :jsonbHash, default: {}
  
+  validate :name_links_are_obsolete
    before_create :permission_to_attach_to_objects
    before_update :permission_to_attach_to_objects
-  after_update :maybe_invalidate_permissions_cache
-  after_create :maybe_invalidate_permissions_cache
-  after_destroy :maybe_invalidate_permissions_cache
-  validate :name_links_are_obsolete
+  after_update :call_update_permissions
+  after_create :call_update_permissions
+  before_destroy :clear_permissions
+  after_destroy :check_permissions
  
    api_accessible :user, extend: :common do |t|
      t.add :tail_uuid
@@ -64,15 +65,28 @@ class Link < ArvadosModel
      false
    end
  
-  def maybe_invalidate_permissions_cache
+  PERM_LEVEL = {
+    'can_read' => 1,
+    'can_login' => 1,
+    'can_write' => 2,
+    'can_manage' => 3,
+  }
+
+  def call_update_permissions
+    if self.link_class == 'permission'
+      update_permissions tail_uuid, head_uuid, PERM_LEVEL[name], self.uuid
+    end
+  end
+
+  def clear_permissions
+    if self.link_class == 'permission'
+      update_permissions tail_uuid, head_uuid, REVOKE_PERM, self.uuid
+    end
+  end
+
+  def check_permissions
      if self.link_class == 'permission'
-      # Clearing the entire permissions cache can generate many
-      # unnecessary queries if many active users are not affected by
-      # this change. In such cases it would be better to search cached
-      # permissions for head_uuid and tail_uuid, and invalidate the
-      # cache for only those users. (This would require a browseable
-      # cache.)
-      User.invalidate_permissions_cache
+      check_permissions_against_full_refresh
      end
    end
  
diff --git a/services/api/app/models/materialized_permission.rb b/services/api/app/models/materialized_permission.rb

new file mode 100644 (file)

index 0000000..24ba673
--- /dev/null
+++ b/services/api/app/models/materialized_permission.rb
@@ -0,0 +1,6 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class MaterializedPermission < ApplicationRecord
+end
diff --git a/services/api/app/models/trashed_group.rb b/services/api/app/models/trashed_group.rb

new file mode 100644 (file)

index 0000000..5c85946
--- /dev/null
+++ b/services/api/app/models/trashed_group.rb
@@ -0,0 +1,6 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class TrashedGroup < ApplicationRecord
+end
diff --git a/services/api/app/models/user.rb b/services/api/app/models/user.rb

index c3641b64e84f04217145edacab05ac8d84f259a7..64facaa98e84c2eacfdc6fed38372f2dff22fdde 100644 (file)
--- a/services/api/app/models/user.rb
+++ b/services/api/app/models/user.rb
@@ -3,7 +3,6 @@
  # SPDX-License-Identifier: AGPL-3.0
  
  require 'can_be_an_owner'
-require 'refresh_permission_view'
  
  class User < ArvadosModel
    include HasUuid
@@ -28,25 +27,31 @@ class User < ArvadosModel
      user.username.nil? and user.username_changed?
    }
    before_update :setup_on_activate
+
    before_create :check_auto_admin
    before_create :set_initial_username, :if => Proc.new { |user|
      user.username.nil? and user.email
    }
+  after_create :after_ownership_change
    after_create :setup_on_activate
    after_create :add_system_group_permission_link
-  after_create :invalidate_permissions_cache
    after_create :auto_setup_new_user, :if => Proc.new { |user|
      Rails.configuration.Users.AutoSetupNewUsers and
      (user.uuid != system_user_uuid) and
      (user.uuid != anonymous_user_uuid)
    }
    after_create :send_admin_notifications
+
+  before_update :before_ownership_change
+  after_update :after_ownership_change
    after_update :send_profile_created_notification
    after_update :sync_repository_names, :if => Proc.new { |user|
      (user.uuid != system_user_uuid) and
      user.username_changed? and
      (not user.username_was.nil?)
    }
+  before_destroy :clear_permissions
+  after_destroy :remove_self_from_permissions
  
    has_many :authorized_keys, :foreign_key => :authorized_user_uuid, :primary_key => :uuid
    has_many :repositories, foreign_key: :owner_uuid, primary_key: :uuid
@@ -77,6 +82,12 @@ class User < ArvadosModel
       {read: true, write: true},
       {read: true, write: true, manage: true}]
  
+  VAL_FOR_PERM =
+    {:read => 1,
+     :write => 2,
+     :manage => 3}
+
+
    def full_name
      "#{first_name} #{last_name}".strip
    end
@@ -88,7 +99,7 @@ class User < ArvadosModel
    end
  
    def groups_i_can(verb)
-    my_groups = self.group_permissions.select { |uuid, mask| mask[verb] }.keys
+    my_groups = self.group_permissions(VAL_FOR_PERM[verb]).keys
      if verb == :read
        my_groups << anonymous_group_uuid
      end
@@ -107,60 +118,68 @@ class User < ArvadosModel
          end
        end
        next if target_uuid == self.uuid
-      next if (group_permissions[target_uuid] and
-               group_permissions[target_uuid][action])
-      if target.respond_to? :owner_uuid
-        next if target.owner_uuid == self.uuid
-        next if (group_permissions[target.owner_uuid] and
-                 group_permissions[target.owner_uuid][action])
-      end
-      sufficient_perms = case action
-                         when :manage
-                           ['can_manage']
-                         when :write
-                           ['can_manage', 'can_write']
-                         when :read
-                           ['can_manage', 'can_write', 'can_read']
-                         else
-                           # (Skip this kind of permission opportunity
-                           # if action is an unknown permission type)
-                         end
-      if sufficient_perms
-        # Check permission links with head_uuid pointing directly at
-        # the target object. If target is a Group, this is redundant
-        # and will fail except [a] if permission caching is broken or
-        # [b] during a race condition, where a permission link has
-        # *just* been added.
-        if Link.where(link_class: 'permission',
-                      name: sufficient_perms,
-                      tail_uuid: groups_i_can(action) + [self.uuid],
-                      head_uuid: target_uuid).any?
-          next
-        end
+
+      target_owner_uuid = target.owner_uuid if target.respond_to? :owner_uuid
+
+      user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: "$1", perm_level: "$3"}
+
+      unless ActiveRecord::Base.connection.
+        exec_query(%{
+SELECT 1 FROM #{PERMISSION_VIEW}
+  WHERE user_uuid in (#{user_uuids_subquery}) and
+        ((target_uuid = $2 and perm_level >= $3)
+         or (target_uuid = $4 and perm_level >= $3 and traverse_owned))
+},
+                  # "name" arg is a query label that appears in logs:
+                   "user_can_query",
+                   [[nil, self.uuid],
+                    [nil, target_uuid],
+                    [nil, VAL_FOR_PERM[action]],
+                    [nil, target_owner_uuid]]
+                  ).any?
+        return false
        end
-      return false
      end
      true
    end
  
-  def self.invalidate_permissions_cache(async=false)
-    refresh_permission_view(async)
+  def before_ownership_change
+    if owner_uuid_changed? and !self.owner_uuid_was.nil?
+      MaterializedPermission.where(user_uuid: owner_uuid_was, target_uuid: uuid).delete_all
+      update_permissions self.owner_uuid_was, self.uuid, REVOKE_PERM
+    end
+  end
+
+  def after_ownership_change
+    if owner_uuid_changed?
+      update_permissions self.owner_uuid, self.uuid, CAN_MANAGE_PERM
+    end
+  end
+
+  def clear_permissions
+    MaterializedPermission.where("user_uuid = ? and target_uuid != ?", uuid, uuid).delete_all
    end
  
-  def invalidate_permissions_cache
-    User.invalidate_permissions_cache
+  def remove_self_from_permissions
+    MaterializedPermission.where("target_uuid = ?", uuid).delete_all
+    check_permissions_against_full_refresh
    end
  
    # Return a hash of {user_uuid: group_perms}
+  #
+  # note: this does not account for permissions that a user gains by
+  # having can_manage on another user.
    def self.all_group_permissions
      all_perms = {}
      ActiveRecord::Base.connection.
-      exec_query("SELECT user_uuid, target_owner_uuid, perm_level, trashed
+      exec_query(%{
+SELECT user_uuid, target_uuid, perm_level
                    FROM #{PERMISSION_VIEW}
-                  WHERE target_owner_uuid IS NOT NULL",
+                  WHERE traverse_owned
+},
                    # "name" arg is a query label that appears in logs:
-                  "all_group_permissions",
-                  ).rows.each do |user_uuid, group_uuid, max_p_val, trashed|
+                 "all_group_permissions").
+      rows.each do |user_uuid, group_uuid, max_p_val|
        all_perms[user_uuid] ||= {}
        all_perms[user_uuid][group_uuid] = PERMS_FOR_VAL[max_p_val.to_i]
      end
@@ -170,18 +189,23 @@ class User < ArvadosModel
    # Return a hash of {group_uuid: perm_hash} where perm_hash[:read]
    # and perm_hash[:write] are true if this user can read and write
    # objects owned by group_uuid.
-  def group_permissions
-    group_perms = {self.uuid => {:read => true, :write => true, :manage => true}}
+  def group_permissions(level=1)
+    group_perms = {}
+
+    user_uuids_subquery = USER_UUIDS_SUBQUERY_TEMPLATE % {user: "$1", perm_level: "$2"}
+
      ActiveRecord::Base.connection.
-      exec_query("SELECT target_owner_uuid, perm_level, trashed
-                  FROM #{PERMISSION_VIEW}
-                  WHERE user_uuid = $1
-                  AND target_owner_uuid IS NOT NULL",
+      exec_query(%{
+SELECT target_uuid, perm_level
+  FROM #{PERMISSION_VIEW}
+  WHERE user_uuid in (#{user_uuids_subquery}) and perm_level >= $2
+},
                    # "name" arg is a query label that appears in logs:
-                  "group_permissions for #{uuid}",
+                  "User.group_permissions",
                    # "binds" arg is an array of [col_id, value] for '$1' vars:
-                  [[nil, uuid]],
-                ).rows.each do |group_uuid, max_p_val, trashed|
+                  [[nil, uuid],
+                   [nil, level]]).
+      rows.each do |group_uuid, max_p_val|
        group_perms[group_uuid] = PERMS_FOR_VAL[max_p_val.to_i]
      end
      group_perms
@@ -309,6 +333,18 @@ class User < ArvadosModel
        self.uuid = new_uuid
        save!(validate: false)
        change_all_uuid_refs(old_uuid: old_uuid, new_uuid: new_uuid)
+    ActiveRecord::Base.connection.exec_update %{
+update #{PERMISSION_VIEW} set user_uuid=$1 where user_uuid = $2
+},
+                                             'User.update_uuid.update_permissions_user_uuid',
+                                             [[nil, new_uuid],
+                                              [nil, old_uuid]]
+      ActiveRecord::Base.connection.exec_update %{
+update #{PERMISSION_VIEW} set target_uuid=$1 where target_uuid = $2
+},
+                                            'User.update_uuid.update_permissions_target_uuid',
+                                             [[nil, new_uuid],
+                                              [nil, old_uuid]]
      end
    end
  
@@ -334,6 +370,9 @@ class User < ArvadosModel
        raise "user does not exist" if !new_user
        raise "cannot merge to an already merged user" if new_user.redirect_to_user_uuid
  
+      self.clear_permissions
+      new_user.clear_permissions
+
        # If 'self' is a remote user, don't transfer authorizations
        # (i.e. ability to access the account) to the new user, because
        # that gives the remote site the ability to access the 'new'
@@ -408,7 +447,12 @@ class User < ArvadosModel
        if redirect_to_new_user
          update_attributes!(redirect_to_user_uuid: new_user.uuid, username: nil)
        end
-      invalidate_permissions_cache
+      skip_check_permissions_against_full_refresh do
+        update_permissions self.uuid, self.uuid, CAN_MANAGE_PERM
+        update_permissions new_user.uuid, new_user.uuid, CAN_MANAGE_PERM
+        update_permissions new_user.owner_uuid, new_user.uuid, CAN_MANAGE_PERM
+      end
+      update_permissions self.owner_uuid, self.uuid, CAN_MANAGE_PERM
      end
    end
  
diff --git a/services/api/db/migrate/20200501150153_permission_table.rb b/services/api/db/migrate/20200501150153_permission_table.rb

new file mode 100644 (file)

index 0000000..4f9ea15
--- /dev/null
+++ b/services/api/db/migrate/20200501150153_permission_table.rb
@@ -0,0 +1,362 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require '20200501150153_permission_table_constants'
+
+class PermissionTable < ActiveRecord::Migration[5.0]
+  def up
+    # This is a major migration.  We are replacing the
+    # materialized_permission_view, which is fully recomputed any time
+    # a permission changes (and becomes very expensive as the number
+    # of users/groups becomes large), with a new strategy that only
+    # recomputes permissions for the subset of objects that are
+    # potentially affected by the addition or removal of a permission
+    # relationship (i.e. ownership or a permission link).
+    #
+    # This also disentangles the concept of "trashed groups" from the
+    # permissions system.  Updating trashed items follows a similar
+    # (but less complicated) strategy to updating permissions, so it
+    # may be helpful to look at that first.
+
+    ActiveRecord::Base.connection.execute "DROP MATERIALIZED VIEW IF EXISTS materialized_permission_view;"
+    drop_table :permission_refresh_lock
+
+    # This table stores the set of trashed groups and their trash_at
+    # time.  Used to exclude trashed projects and their contents when
+    # getting object listings.
+    create_table :trashed_groups, :id => false do |t|
+      t.string :group_uuid
+      t.datetime :trash_at
+    end
+    add_index :trashed_groups, :group_uuid, :unique => true
+
+    ActiveRecord::Base.connection.execute %{
+create or replace function project_subtree_with_trash_at (starting_uuid varchar(27), starting_trash_at timestamp)
+returns table (target_uuid varchar(27), trash_at timestamp)
+STABLE
+language SQL
+as $$
+/* Starting from a project, recursively traverse all the projects
+  underneath it and return a set of project uuids and trash_at times
+  (may be null).  The initial trash_at can be a timestamp or null.
+  The trash_at time propagates downward to groups it owns, i.e. when a
+  group is trashed, everything underneath it in the ownership
+  hierarchy is also considered trashed.  However, this is fact is
+  recorded in the trashed_groups table, not by updating trash_at field
+  in the groups table.
+*/
+WITH RECURSIVE
+        project_subtree(uuid, trash_at) as (
+        values (starting_uuid, starting_trash_at)
+        union
+        select groups.uuid, LEAST(project_subtree.trash_at, groups.trash_at)
+          from groups join project_subtree on (groups.owner_uuid = project_subtree.uuid)
+        )
+        select uuid, trash_at from project_subtree;
+$$;
+}
+
+    # Now populate the table.  For a non-test databse this is the only
+    # time this ever happens, after this the trash table is updated
+    # incrementally.  See app/models/group.rb#update_trash
+    refresh_trashed
+
+    # The table to store the flattened permissions.  This is almost
+    # exactly the same as the old materalized_permission_view except
+    # that the target_owner_uuid colunm in the view is now just a
+    # boolean traverse_owned (the column was only ever tested for null
+    # or non-null).
+    #
+    # For details on how this table is used to apply permissions to
+    # queries, see app/models/arvados_model.rb#readable_by
+    #
+    create_table :materialized_permissions, :id => false do |t|
+      t.string :user_uuid
+      t.string :target_uuid
+      t.integer :perm_level
+      t.boolean :traverse_owned
+    end
+    add_index :materialized_permissions, [:user_uuid, :target_uuid], unique: true, name: 'permission_user_target'
+    add_index :materialized_permissions, [:target_uuid], unique: false, name: 'permission_target'
+
+    ActiveRecord::Base.connection.execute %{
+create or replace function should_traverse_owned (starting_uuid varchar(27),
+                                                  starting_perm integer)
+  returns bool
+IMMUTABLE
+language SQL
+as $$
+/* Helper function.  Determines if permission on an object implies
+   transitive permission to things the object owns.  This is always
+   true for groups, but only true for users when the permission level
+   is can_manage.
+*/
+select starting_uuid like '_____-j7d0g-_______________' or
+       (starting_uuid like '_____-tpzed-_______________' and starting_perm >= 3);
+$$;
+}
+
+    # Merge all permission relationships into a single view.  This
+    # consists of: groups owned by users and projects, users owned
+    # by other users, users have permission on themselves,
+    # and explicit permission links.
+    #
+    # A SQL view gets inlined into the query where it is used as a
+    # subquery.  This enables the query planner to inject constraints,
+    # so it only has to look up edges it plans to traverse and avoid a brute
+    # force query of all edges.
+    ActiveRecord::Base.connection.execute %{
+create view permission_graph_edges as
+  select groups.owner_uuid as tail_uuid, groups.uuid as head_uuid,
+         (3) as val, groups.uuid as edge_id from groups
+union all
+  select users.owner_uuid as tail_uuid, users.uuid as head_uuid,
+         (3) as val, users.uuid as edge_id from users
+union all
+  select users.uuid as tail_uuid, users.uuid as head_uuid,
+         (3) as val, '' as edge_id from users
+union all
+  select links.tail_uuid,
+         links.head_uuid,
+         CASE
+           WHEN links.name = 'can_read'   THEN 1
+           WHEN links.name = 'can_login'  THEN 1
+           WHEN links.name = 'can_write'  THEN 2
+           WHEN links.name = 'can_manage' THEN 3
+           ELSE 0
+         END as val,
+         links.uuid as edge_id
+      from links
+      where links.link_class='permission'
+}
+
+    # This is used to ensure that the permission edge passed into
+    # compute_permission_subgraph takes replaces the existing edge in
+    # the "edges" view that is about to be removed.
+    edge_perm = %{
+case (edges.edge_id = perm_edge_id)
+                               when true then starting_perm
+                               else edges.val
+                            end
+}
+
+    # The primary function to compute permissions for a subgraph.
+    # Comments on how it works are inline.
+    #
+    # Due to performance issues due to the query optimizer not
+    # working across function and "with" expression boundaries, I
+    # had to fall back on using string templates for repeated code
+    # in order to inline it.
+
+    ActiveRecord::Base.connection.execute %{
+create or replace function compute_permission_subgraph (perm_origin_uuid varchar(27),
+                                                        starting_uuid varchar(27),
+                                                        starting_perm integer,
+                                                        perm_edge_id varchar(27))
+returns table (user_uuid varchar(27), target_uuid varchar(27), val integer, traverse_owned bool)
+STABLE
+language SQL
+as $$
+
+/* The purpose of this function is to compute the permissions for a
+   subgraph of the database, starting from a given edge.  The newly
+   computed permissions are used to add and remove rows from the main
+   permissions table.
+
+   perm_origin_uuid: The object that 'gets' the permission.
+
+   starting_uuid: The starting object the permission applies to.
+
+   starting_perm: The permission that perm_origin_uuid 'has' on
+                  starting_uuid One of 1, 2, 3 for can_read,
+                  can_write, can_manage respectively, or 0 to revoke
+                  permissions.
+
+   perm_edge_id: Identifies the permission edge that is being updated.
+                 Changes of ownership, this is starting_uuid.
+                 For links, this is the uuid of the link object.
+                 This is used to override the edge value in the database
+                 with starting_perm.  This is necessary when revoking
+                 permissions because the update happens before edge is
+                 actually removed.
+*/
+with
+  /* Starting from starting_uuid, determine the set of objects that
+     could be affected by this permission change.
+
+     Note: We don't traverse users unless it is an "identity"
+     permission (permission origin is self).
+  */
+  perm_from_start(perm_origin_uuid, target_uuid, val, traverse_owned) as (
+    #{PERM_QUERY_TEMPLATE % {:base_case => %{
+             values (perm_origin_uuid, starting_uuid, starting_perm,
+                    should_traverse_owned(starting_uuid, starting_perm),
+                    (perm_origin_uuid = starting_uuid or starting_uuid not like '_____-tpzed-_______________'))
+},
+:edge_perm => edge_perm
+} }),
+
+  /* Find other inbound edges that grant permissions to 'targets' in
+     perm_from_start, and compute permissions that originate from
+     those.
+
+     This is necessary for two reasons:
+
+       1) Other users may have access to a subset of the objects
+       through other permission links than the one we started from.
+       If we don't recompute them, their permission will get dropped.
+
+       2) There may be more than one path through which a user gets
+       permission to an object.  For example, a user owns a project
+       and also shares it can_read with a group the user belongs
+       to. adding the can_read link must not overwrite the existing
+       can_manage permission granted by ownership.
+  */
+  additional_perms(perm_origin_uuid, target_uuid, val, traverse_owned) as (
+    #{PERM_QUERY_TEMPLATE % {:base_case => %{
+    select edges.tail_uuid as origin_uuid, edges.head_uuid as target_uuid, edges.val,
+           should_traverse_owned(edges.head_uuid, edges.val),
+           edges.head_uuid like '_____-j7d0g-_______________'
+      from permission_graph_edges as edges
+      where edges.edge_id != perm_edge_id and
+            edges.tail_uuid not in (select target_uuid from perm_from_start where target_uuid like '_____-j7d0g-_______________') and
+            edges.head_uuid in (select target_uuid from perm_from_start)
+},
+:edge_perm => edge_perm
+} }),
+
+  /* Combine the permissions computed in the first two phases. */
+  all_perms(perm_origin_uuid, target_uuid, val, traverse_owned) as (
+      select * from perm_from_start
+    union all
+      select * from additional_perms
+  )
+
+  /* The actual query that produces rows to be added or removed
+     from the materialized_permissions table.  This is the clever
+     bit.
+
+     Key insights:
+
+     * For every group, the materialized_permissions lists all users
+       that can access to that group.
+
+     * The all_perms subquery has computed permissions on on a set of
+       objects for all inbound "origins", which are users or groups.
+
+     * Permissions through groups are transitive.
+
+     We can infer:
+
+     1) The materialized_permissions table declares that user X has permission N on group Y
+     2) The all_perms result has determined group Y has permission M on object Z
+     3) Therefore, user X has permission min(N, M) on object Z
+
+     This allows us to efficiently determine the set of users that
+     have permissions on the subset of objects, without having to
+     follow the chain of permission back up to find those users.
+
+     In addition, because users always have permission on themselves, this
+     query also makes sure those permission rows are always
+     returned.
+  */
+  select v.user_uuid, v.target_uuid, max(v.perm_level), bool_or(v.traverse_owned) from
+    (select m.user_uuid,
+         u.target_uuid,
+         least(u.val, m.perm_level) as perm_level,
+         u.traverse_owned
+      from all_perms as u, materialized_permissions as m
+           where u.perm_origin_uuid = m.target_uuid AND m.traverse_owned
+           AND (m.user_uuid = m.target_uuid or m.target_uuid not like '_____-tpzed-_______________')
+    union all
+      select target_uuid as user_uuid, target_uuid, 3, true
+        from all_perms
+        where all_perms.target_uuid like '_____-tpzed-_______________') as v
+    group by v.user_uuid, v.target_uuid
+$$;
+     }
+
+    #
+    # Populate materialized_permissions by traversing permissions
+    # starting at each user.
+    #
+    refresh_permissions
+  end
+
+  def down
+    drop_table :materialized_permissions
+    drop_table :trashed_groups
+
+    ActiveRecord::Base.connection.execute "DROP function project_subtree_with_trash_at (varchar, timestamp);"
+    ActiveRecord::Base.connection.execute "DROP function compute_permission_subgraph (varchar, varchar, integer, varchar);"
+    ActiveRecord::Base.connection.execute "DROP function should_traverse_owned(varchar, integer);"
+    ActiveRecord::Base.connection.execute "DROP view permission_graph_edges;"
+
+    ActiveRecord::Base.connection.execute(%{
+CREATE MATERIALIZED VIEW materialized_permission_view AS
+ WITH RECURSIVE perm_value(name, val) AS (
+         VALUES ('can_read'::text,(1)::smallint), ('can_login'::text,1), ('can_write'::text,2), ('can_manage'::text,3)
+        ), perm_edges(tail_uuid, head_uuid, val, follow, trashed) AS (
+         SELECT links.tail_uuid,
+            links.head_uuid,
+            pv.val,
+            ((pv.val = 3) OR (groups.uuid IS NOT NULL)) AS follow,
+            (0)::smallint AS trashed,
+            (0)::smallint AS followtrash
+           FROM ((public.links
+             LEFT JOIN perm_value pv ON ((pv.name = (links.name)::text)))
+             LEFT JOIN public.groups ON (((pv.val < 3) AND ((groups.uuid)::text = (links.head_uuid)::text))))
+          WHERE ((links.link_class)::text = 'permission'::text)
+        UNION ALL
+         SELECT groups.owner_uuid,
+            groups.uuid,
+            3,
+            true AS bool,
+                CASE
+                    WHEN ((groups.trash_at IS NOT NULL) AND (groups.trash_at < clock_timestamp())) THEN 1
+                    ELSE 0
+                END AS "case",
+            1
+           FROM public.groups
+        ), perm(val, follow, user_uuid, target_uuid, trashed) AS (
+         SELECT (3)::smallint AS val,
+            true AS follow,
+            (users.uuid)::character varying(32) AS user_uuid,
+            (users.uuid)::character varying(32) AS target_uuid,
+            (0)::smallint AS trashed
+           FROM public.users
+        UNION
+         SELECT (LEAST((perm_1.val)::integer, edges.val))::smallint AS val,
+            edges.follow,
+            perm_1.user_uuid,
+            (edges.head_uuid)::character varying(32) AS target_uuid,
+            ((GREATEST((perm_1.trashed)::integer, edges.trashed) * edges.followtrash))::smallint AS trashed
+           FROM (perm perm_1
+             JOIN perm_edges edges ON ((perm_1.follow AND ((edges.tail_uuid)::text = (perm_1.target_uuid)::text))))
+        )
+ SELECT perm.user_uuid,
+    perm.target_uuid,
+    max(perm.val) AS perm_level,
+        CASE perm.follow
+            WHEN true THEN perm.target_uuid
+            ELSE NULL::character varying
+        END AS target_owner_uuid,
+    max(perm.trashed) AS trashed
+   FROM perm
+  GROUP BY perm.user_uuid, perm.target_uuid,
+        CASE perm.follow
+            WHEN true THEN perm.target_uuid
+            ELSE NULL::character varying
+        END
+  WITH NO DATA;
+}
+    )
+
+    add_index :materialized_permission_view, [:trashed, :target_uuid], name: 'permission_target_trashed'
+    add_index :materialized_permission_view, [:user_uuid, :trashed, :perm_level], name: 'permission_target_user_trashed_level'
+    create_table :permission_refresh_lock
+
+    ActiveRecord::Base.connection.execute 'REFRESH MATERIALIZED VIEW materialized_permission_view;'
+  end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql

index 88cd0baa2f7bab44be54080e9d9b6a732d210d16..469475ad9604232fbeafc76889c4665b6cd77b80 100644 (file)
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -38,6 +38,216 @@ CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
  -- COMMENT ON EXTENSION pg_trgm IS 'text similarity measurement and index searching based on trigrams';
  
  
+--
+-- Name: compute_permission_subgraph(character varying, character varying, integer, character varying); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.compute_permission_subgraph(perm_origin_uuid character varying, starting_uuid character varying, starting_perm integer, perm_edge_id character varying) RETURNS TABLE(user_uuid character varying, target_uuid character varying, val integer, traverse_owned boolean)
+    LANGUAGE sql STABLE
+    AS $$
+
+/* The purpose of this function is to compute the permissions for a
+   subgraph of the database, starting from a given edge.  The newly
+   computed permissions are used to add and remove rows from the main
+   permissions table.
+
+   perm_origin_uuid: The object that 'gets' the permission.
+
+   starting_uuid: The starting object the permission applies to.
+
+   starting_perm: The permission that perm_origin_uuid 'has' on
+                  starting_uuid One of 1, 2, 3 for can_read,
+                  can_write, can_manage respectively, or 0 to revoke
+                  permissions.
+
+   perm_edge_id: Identifies the permission edge that is being updated.
+                 Changes of ownership, this is starting_uuid.
+                 For links, this is the uuid of the link object.
+                 This is used to override the edge value in the database
+                 with starting_perm.  This is necessary when revoking
+                 permissions because the update happens before edge is
+                 actually removed.
+*/
+with
+  /* Starting from starting_uuid, determine the set of objects that
+     could be affected by this permission change.
+
+     Note: We don't traverse users unless it is an "identity"
+     permission (permission origin is self).
+  */
+  perm_from_start(perm_origin_uuid, target_uuid, val, traverse_owned) as (
+    
+WITH RECURSIVE
+        traverse_graph(origin_uuid, target_uuid, val, traverse_owned, starting_set) as (
+            
+             values (perm_origin_uuid, starting_uuid, starting_perm,
+                    should_traverse_owned(starting_uuid, starting_perm),
+                    (perm_origin_uuid = starting_uuid or starting_uuid not like '_____-tpzed-_______________'))
+
+          union
+            (select traverse_graph.origin_uuid,
+                    edges.head_uuid,
+                      least(
+case (edges.edge_id = perm_edge_id)
+                               when true then starting_perm
+                               else edges.val
+                            end
+,
+                            traverse_graph.val),
+                    should_traverse_owned(edges.head_uuid, edges.val),
+                    false
+             from permission_graph_edges as edges, traverse_graph
+             where traverse_graph.target_uuid = edges.tail_uuid
+             and (edges.tail_uuid like '_____-j7d0g-_______________' or
+                  traverse_graph.starting_set)))
+        select traverse_graph.origin_uuid, target_uuid, max(val) as val, bool_or(traverse_owned) as traverse_owned from traverse_graph
+        group by (traverse_graph.origin_uuid, target_uuid)
+),
+
+  /* Find other inbound edges that grant permissions to 'targets' in
+     perm_from_start, and compute permissions that originate from
+     those.
+
+     This is necessary for two reasons:
+
+       1) Other users may have access to a subset of the objects
+       through other permission links than the one we started from.
+       If we don't recompute them, their permission will get dropped.
+
+       2) There may be more than one path through which a user gets
+       permission to an object.  For example, a user owns a project
+       and also shares it can_read with a group the user belongs
+       to. adding the can_read link must not overwrite the existing
+       can_manage permission granted by ownership.
+  */
+  additional_perms(perm_origin_uuid, target_uuid, val, traverse_owned) as (
+    
+WITH RECURSIVE
+        traverse_graph(origin_uuid, target_uuid, val, traverse_owned, starting_set) as (
+            
+    select edges.tail_uuid as origin_uuid, edges.head_uuid as target_uuid, edges.val,
+           should_traverse_owned(edges.head_uuid, edges.val),
+           edges.head_uuid like '_____-j7d0g-_______________'
+      from permission_graph_edges as edges
+      where edges.edge_id != perm_edge_id and
+            edges.tail_uuid not in (select target_uuid from perm_from_start where target_uuid like '_____-j7d0g-_______________') and
+            edges.head_uuid in (select target_uuid from perm_from_start)
+
+          union
+            (select traverse_graph.origin_uuid,
+                    edges.head_uuid,
+                      least(
+case (edges.edge_id = perm_edge_id)
+                               when true then starting_perm
+                               else edges.val
+                            end
+,
+                            traverse_graph.val),
+                    should_traverse_owned(edges.head_uuid, edges.val),
+                    false
+             from permission_graph_edges as edges, traverse_graph
+             where traverse_graph.target_uuid = edges.tail_uuid
+             and (edges.tail_uuid like '_____-j7d0g-_______________' or
+                  traverse_graph.starting_set)))
+        select traverse_graph.origin_uuid, target_uuid, max(val) as val, bool_or(traverse_owned) as traverse_owned from traverse_graph
+        group by (traverse_graph.origin_uuid, target_uuid)
+),
+
+  /* Combine the permissions computed in the first two phases. */
+  all_perms(perm_origin_uuid, target_uuid, val, traverse_owned) as (
+      select * from perm_from_start
+    union all
+      select * from additional_perms
+  )
+
+  /* The actual query that produces rows to be added or removed
+     from the materialized_permissions table.  This is the clever
+     bit.
+
+     Key insights:
+
+     * For every group, the materialized_permissions lists all users
+       that can access to that group.
+
+     * The all_perms subquery has computed permissions on on a set of
+       objects for all inbound "origins", which are users or groups.
+
+     * Permissions through groups are transitive.
+
+     We can infer:
+
+     1) The materialized_permissions table declares that user X has permission N on group Y
+     2) The all_perms result has determined group Y has permission M on object Z
+     3) Therefore, user X has permission min(N, M) on object Z
+
+     This allows us to efficiently determine the set of users that
+     have permissions on the subset of objects, without having to
+     follow the chain of permission back up to find those users.
+
+     In addition, because users always have permission on themselves, this
+     query also makes sure those permission rows are always
+     returned.
+  */
+  select v.user_uuid, v.target_uuid, max(v.perm_level), bool_or(v.traverse_owned) from
+    (select m.user_uuid,
+         u.target_uuid,
+         least(u.val, m.perm_level) as perm_level,
+         u.traverse_owned
+      from all_perms as u, materialized_permissions as m
+           where u.perm_origin_uuid = m.target_uuid AND m.traverse_owned
+           AND (m.user_uuid = m.target_uuid or m.target_uuid not like '_____-tpzed-_______________')
+    union all
+      select target_uuid as user_uuid, target_uuid, 3, true
+        from all_perms
+        where all_perms.target_uuid like '_____-tpzed-_______________') as v
+    group by v.user_uuid, v.target_uuid
+$$;
+
+
+--
+-- Name: project_subtree_with_trash_at(character varying, timestamp without time zone); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.project_subtree_with_trash_at(starting_uuid character varying, starting_trash_at timestamp without time zone) RETURNS TABLE(target_uuid character varying, trash_at timestamp without time zone)
+    LANGUAGE sql STABLE
+    AS $$
+/* Starting from a project, recursively traverse all the projects
+  underneath it and return a set of project uuids and trash_at times
+  (may be null).  The initial trash_at can be a timestamp or null.
+  The trash_at time propagates downward to groups it owns, i.e. when a
+  group is trashed, everything underneath it in the ownership
+  hierarchy is also considered trashed.  However, this is fact is
+  recorded in the trashed_groups table, not by updating trash_at field
+  in the groups table.
+*/
+WITH RECURSIVE
+        project_subtree(uuid, trash_at) as (
+        values (starting_uuid, starting_trash_at)
+        union
+        select groups.uuid, LEAST(project_subtree.trash_at, groups.trash_at)
+          from groups join project_subtree on (groups.owner_uuid = project_subtree.uuid)
+        )
+        select uuid, trash_at from project_subtree;
+$$;
+
+
+--
+-- Name: should_traverse_owned(character varying, integer); Type: FUNCTION; Schema: public; Owner: -
+--
+
+CREATE FUNCTION public.should_traverse_owned(starting_uuid character varying, starting_perm integer) RETURNS boolean
+    LANGUAGE sql IMMUTABLE
+    AS $$
+/* Helper function.  Determines if permission on an object implies
+   transitive permission to things the object owns.  This is always
+   true for groups, but only true for users when the permission level
+   is can_manage.
+*/
+select starting_uuid like '_____-j7d0g-_______________' or
+       (starting_uuid like '_____-tpzed-_______________' and starting_perm >= 3);
+$$;
+
+
  SET default_tablespace = '';
  
  SET default_with_oids = false;
@@ -719,93 +929,17 @@ ALTER SEQUENCE public.logs_id_seq OWNED BY public.logs.id;
  
  
  --
--- Name: users; Type: TABLE; Schema: public; Owner: -
+-- Name: materialized_permissions; Type: TABLE; Schema: public; Owner: -
  --
  
-CREATE TABLE public.users (
-    id integer NOT NULL,
-    uuid character varying(255),
-    owner_uuid character varying(255) NOT NULL,
-    created_at timestamp without time zone NOT NULL,
-    modified_by_client_uuid character varying(255),
-    modified_by_user_uuid character varying(255),
-    modified_at timestamp without time zone,
-    email character varying(255),
-    first_name character varying(255),
-    last_name character varying(255),
-    identity_url character varying(255),
-    is_admin boolean,
-    prefs text,
-    updated_at timestamp without time zone NOT NULL,
-    default_owner_uuid character varying(255),
-    is_active boolean DEFAULT false,
-    username character varying(255),
-    redirect_to_user_uuid character varying
+CREATE TABLE public.materialized_permissions (
+    user_uuid character varying,
+    target_uuid character varying,
+    perm_level integer,
+    traverse_owned boolean
  );
  
  
---
--- Name: materialized_permission_view; Type: MATERIALIZED VIEW; Schema: public; Owner: -
---
-
-CREATE MATERIALIZED VIEW public.materialized_permission_view AS
- WITH RECURSIVE perm_value(name, val) AS (
-         VALUES ('can_read'::text,(1)::smallint), ('can_login'::text,1), ('can_write'::text,2), ('can_manage'::text,3)
-        ), perm_edges(tail_uuid, head_uuid, val, follow, trashed) AS (
-         SELECT links.tail_uuid,
-            links.head_uuid,
-            pv.val,
-            ((pv.val = 3) OR (groups.uuid IS NOT NULL)) AS follow,
-            (0)::smallint AS trashed,
-            (0)::smallint AS followtrash
-           FROM ((public.links
-             LEFT JOIN perm_value pv ON ((pv.name = (links.name)::text)))
-             LEFT JOIN public.groups ON (((pv.val < 3) AND ((groups.uuid)::text = (links.head_uuid)::text))))
-          WHERE ((links.link_class)::text = 'permission'::text)
-        UNION ALL
-         SELECT groups.owner_uuid,
-            groups.uuid,
-            3,
-            true AS bool,
-                CASE
-                    WHEN ((groups.trash_at IS NOT NULL) AND (groups.trash_at < clock_timestamp())) THEN 1
-                    ELSE 0
-                END AS "case",
-            1
-           FROM public.groups
-        ), perm(val, follow, user_uuid, target_uuid, trashed) AS (
-         SELECT (3)::smallint AS val,
-            true AS follow,
-            (users.uuid)::character varying(32) AS user_uuid,
-            (users.uuid)::character varying(32) AS target_uuid,
-            (0)::smallint AS trashed
-           FROM public.users
-        UNION
-         SELECT (LEAST((perm_1.val)::integer, edges.val))::smallint AS val,
-            edges.follow,
-            perm_1.user_uuid,
-            (edges.head_uuid)::character varying(32) AS target_uuid,
-            ((GREATEST((perm_1.trashed)::integer, edges.trashed) * edges.followtrash))::smallint AS trashed
-           FROM (perm perm_1
-             JOIN perm_edges edges ON ((perm_1.follow AND ((edges.tail_uuid)::text = (perm_1.target_uuid)::text))))
-        )
- SELECT perm.user_uuid,
-    perm.target_uuid,
-    max(perm.val) AS perm_level,
-        CASE perm.follow
-            WHEN true THEN perm.target_uuid
-            ELSE NULL::character varying
-        END AS target_owner_uuid,
-    max(perm.trashed) AS trashed
-   FROM perm
-  GROUP BY perm.user_uuid, perm.target_uuid,
-        CASE perm.follow
-            WHEN true THEN perm.target_uuid
-            ELSE NULL::character varying
-        END
-  WITH NO DATA;
-
-
  --
  -- Name: nodes; Type: TABLE; Schema: public; Owner: -
  --
@@ -851,31 +985,66 @@ ALTER SEQUENCE public.nodes_id_seq OWNED BY public.nodes.id;
  
  
  --
--- Name: permission_refresh_lock; Type: TABLE; Schema: public; Owner: -
+-- Name: users; Type: TABLE; Schema: public; Owner: -
  --
  
-CREATE TABLE public.permission_refresh_lock (
-    id integer NOT NULL
+CREATE TABLE public.users (
+    id integer NOT NULL,
+    uuid character varying(255),
+    owner_uuid character varying(255) NOT NULL,
+    created_at timestamp without time zone NOT NULL,
+    modified_by_client_uuid character varying(255),
+    modified_by_user_uuid character varying(255),
+    modified_at timestamp without time zone,
+    email character varying(255),
+    first_name character varying(255),
+    last_name character varying(255),
+    identity_url character varying(255),
+    is_admin boolean,
+    prefs text,
+    updated_at timestamp without time zone NOT NULL,
+    default_owner_uuid character varying(255),
+    is_active boolean DEFAULT false,
+    username character varying(255),
+    redirect_to_user_uuid character varying
  );
  
  
  --
--- Name: permission_refresh_lock_id_seq; Type: SEQUENCE; Schema: public; Owner: -
---
-
-CREATE SEQUENCE public.permission_refresh_lock_id_seq
-    START WITH 1
-    INCREMENT BY 1
-    NO MINVALUE
-    NO MAXVALUE
-    CACHE 1;
-
-
---
--- Name: permission_refresh_lock_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: -
---
-
-ALTER SEQUENCE public.permission_refresh_lock_id_seq OWNED BY public.permission_refresh_lock.id;
+-- Name: permission_graph_edges; Type: VIEW; Schema: public; Owner: -
+--
+
+CREATE VIEW public.permission_graph_edges AS
+ SELECT groups.owner_uuid AS tail_uuid,
+    groups.uuid AS head_uuid,
+    3 AS val,
+    groups.uuid AS edge_id
+   FROM public.groups
+UNION ALL
+ SELECT users.owner_uuid AS tail_uuid,
+    users.uuid AS head_uuid,
+    3 AS val,
+    users.uuid AS edge_id
+   FROM public.users
+UNION ALL
+ SELECT users.uuid AS tail_uuid,
+    users.uuid AS head_uuid,
+    3 AS val,
+    ''::character varying AS edge_id
+   FROM public.users
+UNION ALL
+ SELECT links.tail_uuid,
+    links.head_uuid,
+        CASE
+            WHEN ((links.name)::text = 'can_read'::text) THEN 1
+            WHEN ((links.name)::text = 'can_login'::text) THEN 1
+            WHEN ((links.name)::text = 'can_write'::text) THEN 2
+            WHEN ((links.name)::text = 'can_manage'::text) THEN 3
+            ELSE 0
+        END AS val,
+    links.uuid AS edge_id
+   FROM public.links
+  WHERE ((links.link_class)::text = 'permission'::text);
  
  
  --
@@ -1079,6 +1248,16 @@ CREATE SEQUENCE public.traits_id_seq
  ALTER SEQUENCE public.traits_id_seq OWNED BY public.traits.id;
  
  
+--
+-- Name: trashed_groups; Type: TABLE; Schema: public; Owner: -
+--
+
+CREATE TABLE public.trashed_groups (
+    group_uuid character varying,
+    trash_at timestamp without time zone
+);
+
+
  --
  -- Name: users_id_seq; Type: SEQUENCE; Schema: public; Owner: -
  --
@@ -1277,13 +1456,6 @@ ALTER TABLE ONLY public.logs ALTER COLUMN id SET DEFAULT nextval('public.logs_id
  ALTER TABLE ONLY public.nodes ALTER COLUMN id SET DEFAULT nextval('public.nodes_id_seq'::regclass);
  
  
---
--- Name: permission_refresh_lock id; Type: DEFAULT; Schema: public; Owner: -
---
-
-ALTER TABLE ONLY public.permission_refresh_lock ALTER COLUMN id SET DEFAULT nextval('public.permission_refresh_lock_id_seq'::regclass);
-
-
  --
  -- Name: pipeline_instances id; Type: DEFAULT; Schema: public; Owner: -
  --
@@ -1468,14 +1640,6 @@ ALTER TABLE ONLY public.nodes
      ADD CONSTRAINT nodes_pkey PRIMARY KEY (id);
  
  
---
--- Name: permission_refresh_lock permission_refresh_lock_pkey; Type: CONSTRAINT; Schema: public; Owner: -
---
-
-ALTER TABLE ONLY public.permission_refresh_lock
-    ADD CONSTRAINT permission_refresh_lock_pkey PRIMARY KEY (id);
-
-
  --
  -- Name: pipeline_instances pipeline_instances_pkey; Type: CONSTRAINT; Schema: public; Owner: -
  --
@@ -2513,6 +2677,13 @@ CREATE INDEX index_traits_on_owner_uuid ON public.traits USING btree (owner_uuid
  CREATE UNIQUE INDEX index_traits_on_uuid ON public.traits USING btree (uuid);
  
  
+--
+-- Name: index_trashed_groups_on_group_uuid; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE UNIQUE INDEX index_trashed_groups_on_group_uuid ON public.trashed_groups USING btree (group_uuid);
+
+
  --
  -- Name: index_users_on_created_at; Type: INDEX; Schema: public; Owner: -
  --
@@ -2703,17 +2874,17 @@ CREATE INDEX nodes_search_index ON public.nodes USING btree (uuid, owner_uuid, m
  
  
  --
--- Name: permission_target_trashed; Type: INDEX; Schema: public; Owner: -
+-- Name: permission_target; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX permission_target_trashed ON public.materialized_permission_view USING btree (trashed, target_uuid);
+CREATE INDEX permission_target ON public.materialized_permissions USING btree (target_uuid);
  
  
  --
--- Name: permission_target_user_trashed_level; Type: INDEX; Schema: public; Owner: -
+-- Name: permission_user_target; Type: INDEX; Schema: public; Owner: -
  --
  
-CREATE INDEX permission_target_user_trashed_level ON public.materialized_permission_view USING btree (user_uuid, trashed, perm_level);
+CREATE UNIQUE INDEX permission_user_target ON public.materialized_permissions USING btree (user_uuid, target_uuid);
  
  
  --
@@ -3024,6 +3195,7 @@ INSERT INTO "schema_migrations" (version) VALUES
  ('20190523180148'),
  ('20190808145904'),
  ('20190809135453'),
-('20190905151603');
+('20190905151603'),
+('20200501150153');
  
  
diff --git a/services/api/lib/20200501150153_permission_table_constants.rb b/services/api/lib/20200501150153_permission_table_constants.rb

new file mode 100644 (file)

index 0000000..6e43a62
--- /dev/null
+++ b/services/api/lib/20200501150153_permission_table_constants.rb
@@ -0,0 +1,85 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# These constants are used in both
+# db/migrate/20200501150153_permission_table and update_permissions
+#
+# This file allows them to be easily imported by both to avoid duplication.
+#
+# Don't mess with this!  Any changes will affect both the current
+# update_permissions and the past migration.  If you are tinkering
+# with the permission system and need to change how
+# PERM_QUERY_TEMPLATE, refresh_trashed or refresh_permissions works,
+# you should make a new file with your modified functions and have
+# update_permissions reference that file instead.
+
+PERMISSION_VIEW = "materialized_permissions"
+
+TRASHED_GROUPS = "trashed_groups"
+
+# We need to use this parameterized query in a few different places,
+# including as a subquery in a larger query.
+#
+# There's basically two options, the way I did this originally was to
+# put this in a postgres function and do a lateral join over it.
+# However, postgres functions impose an optimization barrier, and
+# possibly have other overhead with temporary tables, so I ended up
+# going with the brute force approach of inlining the whole thing.
+#
+# The two substitutions are "base_case" which determines the initial
+# set of permission origins and "edge_perm" which is used to ensure
+# that the new permission takes precedence over the one in the edges
+# table (but some queries don't need that.)
+#
+PERM_QUERY_TEMPLATE = %{
+WITH RECURSIVE
+        traverse_graph(origin_uuid, target_uuid, val, traverse_owned, starting_set) as (
+            %{base_case}
+          union
+            (select traverse_graph.origin_uuid,
+                    edges.head_uuid,
+                      least(%{edge_perm},
+                            traverse_graph.val),
+                    should_traverse_owned(edges.head_uuid, edges.val),
+                    false
+             from permission_graph_edges as edges, traverse_graph
+             where traverse_graph.target_uuid = edges.tail_uuid
+             and (edges.tail_uuid like '_____-j7d0g-_______________' or
+                  traverse_graph.starting_set)))
+        select traverse_graph.origin_uuid, target_uuid, max(val) as val, bool_or(traverse_owned) as traverse_owned from traverse_graph
+        group by (traverse_graph.origin_uuid, target_uuid)
+}
+
+def refresh_trashed
+  ActiveRecord::Base.transaction do
+    ActiveRecord::Base.connection.execute("LOCK TABLE #{TRASHED_GROUPS}")
+    ActiveRecord::Base.connection.execute("DELETE FROM #{TRASHED_GROUPS}")
+
+    # Helper populate trashed_groups table. This starts with
+    #   each group owned by a user and computes the subtree under that
+    #   group to find any groups that are trashed.
+    ActiveRecord::Base.connection.execute(%{
+INSERT INTO #{TRASHED_GROUPS}
+select ps.target_uuid as group_uuid, ps.trash_at from groups,
+  lateral project_subtree_with_trash_at(groups.uuid, groups.trash_at) ps
+  where groups.owner_uuid like '_____-tpzed-_______________'
+})
+  end
+end
+
+def refresh_permissions
+  ActiveRecord::Base.transaction do
+    ActiveRecord::Base.connection.execute("LOCK TABLE #{PERMISSION_VIEW}")
+    ActiveRecord::Base.connection.execute("DELETE FROM #{PERMISSION_VIEW}")
+
+    ActiveRecord::Base.connection.execute %{
+INSERT INTO materialized_permissions
+    #{PERM_QUERY_TEMPLATE % {:base_case => %{
+        select uuid, uuid, 3, true, true from users
+},
+:edge_perm => 'edges.val'
+} }
+}, "refresh_permission_view.do"
+  end
+end
diff --git a/services/api/lib/refresh_permission_view.rb b/services/api/lib/refresh_permission_view.rb

deleted file mode 100644 (file)

index 5d6081f..0000000
--- a/services/api/lib/refresh_permission_view.rb
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-PERMISSION_VIEW = "materialized_permission_view"
-
-def do_refresh_permission_view
-  ActiveRecord::Base.transaction do
-    ActiveRecord::Base.connection.execute("LOCK TABLE permission_refresh_lock")
-    ActiveRecord::Base.connection.execute("REFRESH MATERIALIZED VIEW #{PERMISSION_VIEW}")
-  end
-end
-
-def refresh_permission_view(async=false)
-  if async and Rails.configuration.API.AsyncPermissionsUpdateInterval > 0
-    exp = Rails.configuration.API.AsyncPermissionsUpdateInterval.seconds
-    need = false
-    Rails.cache.fetch('AsyncRefreshPermissionView', expires_in: exp) do
-      need = true
-    end
-    if need
-      # Schedule a new permission update and return immediately
-      Thread.new do
-        Thread.current.abort_on_exception = false
-        begin
-          sleep(exp)
-          Rails.cache.delete('AsyncRefreshPermissionView')
-          do_refresh_permission_view
-        rescue => e
-          Rails.logger.error "Updating permission view: #{e}\n#{e.backtrace.join("\n\t")}"
-        ensure
-          ActiveRecord::Base.connection.close
-        end
-      end
-      true
-    end
-  else
-    do_refresh_permission_view
-  end
-end
diff --git a/services/api/lib/update_permissions.rb b/services/api/lib/update_permissions.rb

new file mode 100644 (file)

index 0000000..4c2e72d
--- /dev/null
+++ b/services/api/lib/update_permissions.rb
@@ -0,0 +1,205 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require '20200501150153_permission_table_constants'
+
+REVOKE_PERM = 0
+CAN_MANAGE_PERM = 3
+
+def update_permissions perm_origin_uuid, starting_uuid, perm_level, edge_id=nil
+  #
+  # Update a subset of the permission table affected by adding or
+  # removing a particular permission relationship (ownership or a
+  # permission link).
+  #
+  # perm_origin_uuid: This is the object that 'gets' the permission.
+  # It is the owner_uuid or tail_uuid.
+  #
+  # starting_uuid: The object we are computing permission for (or head_uuid)
+  #
+  # perm_level: The level of permission that perm_origin_uuid gets for starting_uuid.
+  #
+  # perm_level is a number from 0-3
+  #   can_read=1
+  #   can_write=2
+  #   can_manage=3
+  #   or call with perm_level=0 to revoke permissions
+  #
+  # check: for testing/debugging, compare the result of the
+  # incremental update against a full table recompute.  Throws an
+  # error if the contents are not identical (ie they produce different
+  # permission results)
+
+  # Theory of operation
+  #
+  # Give a change in a specific permission relationship, we recompute
+  # the set of permissions (for all users) that could possibly be
+  # affected by that relationship.  For example, if a project is
+  # shared with another user, we recompute all permissions for all
+  # projects in the hierarchy.  This returns a set of updated
+  # permissions, which we stash in a temporary table.
+  #
+  # Then, for each user_uuid/target_uuid in the updated permissions
+  # result set we insert/update a permission row in
+  # materialized_permissions, and delete any rows that exist in
+  # materialized_permissions that are not in the result set or have
+  # perm_level=0.
+  #
+  # see db/migrate/20200501150153_permission_table.rb for details on
+  # how the permissions are computed.
+
+  if edge_id.nil?
+    # For changes of ownership, edge_id is starting_uuid.  In turns
+    # out most invocations of update_permissions are for changes of
+    # ownership, so make this parameter optional to reduce
+    # clutter.
+    # For permission links, the uuid of the link object will be passed in for edge_id.
+    edge_id = starting_uuid
+  end
+
+  ActiveRecord::Base.transaction do
+
+    # "Conflicts with the ROW EXCLUSIVE, SHARE UPDATE EXCLUSIVE, SHARE
+    # ROW EXCLUSIVE, EXCLUSIVE, and ACCESS EXCLUSIVE lock modes. This
+    # mode protects a table against concurrent data changes."
+    ActiveRecord::Base.connection.execute "LOCK TABLE #{PERMISSION_VIEW} in SHARE MODE"
+
+    # Workaround for
+    # BUG #15160: planner overestimates number of rows in join when there are more than 200 rows coming from CTE
+    # https://www.postgresql.org/message-id/152395805004.19366.3107109716821067806@wrigleys.postgresql.org
+    #
+    # For a crucial join in the compute_permission_subgraph() query, the
+    # planner mis-estimates the number of rows in a Common Table
+    # Expression (CTE, this is a subquery in a WITH clause) and as a
+    # result it chooses the wrong join order.  The join starts with the
+    # permissions table because it mistakenly thinks
+    # count(materalized_permissions) < count(new computed permissions)
+    # when actually it is the other way around.
+    #
+    # Because of the incorrect join order, it choose the wrong join
+    # strategy (merge join, which works best when two tables are roughly
+    # the same size).  As a workaround, we can tell it not to use that
+    # join strategy, this causes it to pick hash join instead, which
+    # turns out to be a bit better.  However, because the join order is
+    # still wrong, we don't get the full benefit of the index.
+    #
+    # This is very unfortunate because it makes the query performance
+    # dependent on the size of the materalized_permissions table, when
+    # the goal of this design was to make permission updates scale-free
+    # and only depend on the number of permissions affected and not the
+    # total table size.  In several hours of researching I wasn't able
+    # to find a way to force the correct join order, so I'm calling it
+    # here and I have to move on.
+    #
+    # This is apparently addressed in Postgres 12, but I developed &
+    # tested this on Postgres 9.6, so in the future we should reevaluate
+    # the performance & query plan on Postgres 12.
+    #
+    # https://git.furworks.de/opensourcemirror/postgresql/commit/a314c34079cf06d05265623dd7c056f8fa9d577f
+    #
+    # Disable merge join for just this query (also local for this transaction), then reenable it.
+    ActiveRecord::Base.connection.exec_query "SET LOCAL enable_mergejoin to false;"
+
+    temptable_perms = "temp_perms_#{rand(2**64).to_s(10)}"
+    ActiveRecord::Base.connection.exec_query %{
+create temporary table #{temptable_perms} on commit drop
+as select * from compute_permission_subgraph($1, $2, $3, $4)
+},
+                                             'update_permissions.select',
+                                             [[nil, perm_origin_uuid],
+                                              [nil, starting_uuid],
+                                              [nil, perm_level],
+                                              [nil, edge_id]]
+
+    ActiveRecord::Base.connection.exec_query "SET LOCAL enable_mergejoin to true;"
+
+    ActiveRecord::Base.connection.exec_delete %{
+delete from #{PERMISSION_VIEW} where
+  target_uuid in (select target_uuid from #{temptable_perms}) and
+  not exists (select 1 from #{temptable_perms}
+              where target_uuid=#{PERMISSION_VIEW}.target_uuid and
+                    user_uuid=#{PERMISSION_VIEW}.user_uuid and
+                    val>0)
+},
+                                              "update_permissions.delete"
+
+    ActiveRecord::Base.connection.exec_query %{
+insert into #{PERMISSION_VIEW} (user_uuid, target_uuid, perm_level, traverse_owned)
+  select user_uuid, target_uuid, val as perm_level, traverse_owned from #{temptable_perms} where val>0
+on conflict (user_uuid, target_uuid) do update set perm_level=EXCLUDED.perm_level, traverse_owned=EXCLUDED.traverse_owned;
+},
+                                             "update_permissions.insert"
+
+    if perm_level>0
+      check_permissions_against_full_refresh
+    end
+  end
+end
+
+
+def check_permissions_against_full_refresh
+  # No-op except when running tests
+  return unless Rails.env == 'test' and !Thread.current[:no_check_permissions_against_full_refresh]
+
+  # For checking correctness of the incremental permission updates.
+  # Check contents of the current 'materialized_permission' table
+  # against a from-scratch permission refresh.
+
+  q1 = ActiveRecord::Base.connection.exec_query %{
+select user_uuid, target_uuid, perm_level, traverse_owned from #{PERMISSION_VIEW}
+order by user_uuid, target_uuid
+}, "check_permissions_against_full_refresh.permission_table"
+
+  q2 = ActiveRecord::Base.connection.exec_query %{
+    select pq.origin_uuid as user_uuid, target_uuid, pq.val as perm_level, pq.traverse_owned from (
+    #{PERM_QUERY_TEMPLATE % {:base_case => %{
+        select uuid, uuid, 3, true, true from users
+},
+:edge_perm => 'edges.val'
+} }) as pq order by origin_uuid, target_uuid
+}, "check_permissions_against_full_refresh.full_recompute"
+
+  if q1.count != q2.count
+    puts "Didn't match incremental+: #{q1.count} != full refresh-: #{q2.count}"
+  end
+
+  if q1.count > q2.count
+    q1.each_with_index do |r, i|
+      if r != q2[i]
+        puts "+#{r}\n-#{q2[i]}"
+        raise "Didn't match"
+      end
+    end
+  else
+    q2.each_with_index do |r, i|
+      if r != q1[i]
+        puts "+#{q1[i]}\n-#{r}"
+        raise "Didn't match"
+      end
+    end
+  end
+end
+
+def skip_check_permissions_against_full_refresh
+  check_perm_was = Thread.current[:no_check_permissions_against_full_refresh]
+  Thread.current[:no_check_permissions_against_full_refresh] = true
+  begin
+    yield
+  ensure
+    Thread.current[:no_check_permissions_against_full_refresh] = check_perm_was
+  end
+end
+
+# Used to account for permissions that a user gains by having
+# can_manage on another user.
+#
+# note: in theory a user could have can_manage access to a user
+# through multiple levels, that isn't handled here (would require a
+# recursive query).  I think that's okay because users getting
+# transitive access through "can_manage" on a user is is rarely/never
+# used feature and something we probably want to deprecate and remove.
+USER_UUIDS_SUBQUERY_TEMPLATE = %{
+select target_uuid from materialized_permissions where user_uuid in (%{user})
+and target_uuid like '_____-tpzed-_______________' and traverse_owned=true and perm_level >= %{perm_level}
+}
diff --git a/services/api/test/fixtures/groups.yml b/services/api/test/fixtures/groups.yml

index 92a1ced52841942b60f3898a58b5818d53b3b14f..22c999ecd752ecea18160b807f5950ee94a8a521 100644 (file)
--- a/services/api/test/fixtures/groups.yml
+++ b/services/api/test/fixtures/groups.yml
@@ -60,6 +60,7 @@ testusergroup_admins:
    uuid: zzzzz-j7d0g-48foin4vonvc2at
    owner_uuid: zzzzz-tpzed-000000000000000
    name: Administrators of a subset of users
+  group_class: role
  
  aproject:
    uuid: zzzzz-j7d0g-v955i6s2oi1cbso
@@ -143,6 +144,7 @@ active_user_has_can_manage:
    uuid: zzzzz-j7d0g-ptt1ou6a9lxrv07
    owner_uuid: zzzzz-tpzed-d9tiejq69daie8f
    name: Active user has can_manage
+  group_class: project
  
  # Group for testing granting permission between users who share a group.
  group_for_sharing_tests:
@@ -343,4 +345,4 @@ trashed_on_next_sweep:
    trash_at: 2001-01-01T00:00:00Z
    delete_at: 2038-03-01T00:00:00Z
    is_trashed: false
-  modified_at: 2001-01-01T00:00:00Z
-\ No newline at end of file
+  modified_at: 2001-01-01T00:00:00Z
diff --git a/services/api/test/fixtures/users.yml b/services/api/test/fixtures/users.yml

index 57633a31203f6ee0b9c8150324ce93a022f4055d..14630d9efa85615a09585082299290b71def8530 100644 (file)
--- a/services/api/test/fixtures/users.yml
+++ b/services/api/test/fixtures/users.yml
@@ -418,3 +418,17 @@ double_redirects_to_active:
        organization: example.com
        role: Computational biologist
      getting_started_shown: 2015-03-26 12:34:56.789000000 Z
+
+has_can_login_permission:
+  owner_uuid: zzzzz-tpzed-000000000000000
+  uuid: zzzzz-tpzed-xabcdjxw79nv3jz
+  email: can-login-user@arvados.local
+  modified_by_client_uuid: zzzzz-ozdt8-teyxzyd8qllg11h
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  first_name: Can_login
+  last_name: User
+  identity_url: https://can-login-user.openid.local
+  is_active: true
+  is_admin: false
+  modified_at: 2015-03-26 12:34:56.789000000 Z
+  username: can-login-user
diff --git a/services/api/test/functional/arvados/v1/groups_controller_test.rb b/services/api/test/functional/arvados/v1/groups_controller_test.rb

index 30ab89c7e2aa4527960e518cdf63a95bbaef4550..2b5e8d5a9d099947adbb21bec74c8508fd456d16 100644 (file)
--- a/services/api/test/functional/arvados/v1/groups_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/groups_controller_test.rb
@@ -505,9 +505,19 @@ class Arvados::V1::GroupsControllerTest < ActionController::TestCase
  
    ### trashed project tests ###
  
-  [:active, :admin].each do |auth|
+  #
+  # The structure is
+  #
+  # trashed_project         (zzzzz-j7d0g-trashedproject1)
+  #   trashed_subproject    (zzzzz-j7d0g-trashedproject2)
+  #   trashed_subproject3   (zzzzz-j7d0g-trashedproject3)
+  #   zzzzz-xvhdp-cr5trashedcontr
+
+  [:active,
+   :admin].each do |auth|
      # project: to query,    to untrash,    is visible, parent contents listing success
-    [[:trashed_project,     [],                 false, true],
+    [
+     [:trashed_project,     [],                 false, true],
       [:trashed_project,     [:trashed_project], true,  true],
       [:trashed_subproject,  [],                 false, false],
       [:trashed_subproject,  [:trashed_project], true,  true],
diff --git a/services/api/test/integration/groups_test.rb b/services/api/test/integration/groups_test.rb

index eb97fc1f49034165e6ae02a1896b116a3e835890..445670a3d51572827289bb0dc37430168cd4eb9f 100644 (file)
--- a/services/api/test/integration/groups_test.rb
+++ b/services/api/test/integration/groups_test.rb
@@ -193,11 +193,15 @@ class NonTransactionalGroupsTest < ActionDispatch::IntegrationTest
      assert_response :success
    end
  
-  test "create request with async=true defers permissions update" do
+  test "create request with async=true does not defer permissions update" do
      Rails.configuration.API.AsyncPermissionsUpdateInterval = 1 # second
      name = "Random group #{rand(1000)}"
      assert_equal nil, Group.find_by_name(name)
  
+    # Following the implementation of incremental permission updates
+    # (#16007) the async flag is now a no-op.  Permission changes are
+    # visible immediately.
+
      # Trigger the asynchronous permission update by using async=true parameter.
      post "/arvados/v1/groups",
        params: {
@@ -209,7 +213,7 @@ class NonTransactionalGroupsTest < ActionDispatch::IntegrationTest
        headers: auth(:active)
      assert_response 202
  
-    # The group exists on the database, but it's not accessible yet.
+    # The group exists in the database
      assert_not_nil Group.find_by_name(name)
      get "/arvados/v1/groups",
        params: {
@@ -218,7 +222,7 @@ class NonTransactionalGroupsTest < ActionDispatch::IntegrationTest
        },
        headers: auth(:active)
      assert_response 200
-    assert_equal 0, json_response['items_available']
+    assert_equal 1, json_response['items_available']
  
      # Wait a bit and try again.
      sleep(1)
diff --git a/services/api/test/performance/permission_test.rb b/services/api/test/performance/permission_test.rb

index a0605f97e72c1a749ccec12b46cd1a406417e2f0..d0e6413b16ab195b0e8fe03ba865abba0a2b3541 100644 (file)
--- a/services/api/test/performance/permission_test.rb
+++ b/services/api/test/performance/permission_test.rb
@@ -40,7 +40,7 @@ class PermissionPerfTest < ActionDispatch::IntegrationTest
                     end
                   end
                 end
-               User.invalidate_permissions_cache
+               refresh_permissions
               end
             end)
      end
diff --git a/services/api/test/test_helper.rb b/services/api/test/test_helper.rb

index 5747a85cf598965d20b563c918a304b01f9dce87..c99a57aaff49b24910df17c4a745088a4903ce22 100644 (file)
--- a/services/api/test/test_helper.rb
+++ b/services/api/test/test_helper.rb
@@ -2,6 +2,8 @@
  #
  # SPDX-License-Identifier: AGPL-3.0
  
+require 'update_permissions'
+
  ENV["RAILS_ENV"] = "test"
  unless ENV["NO_COVERAGE_TEST"]
    begin
@@ -207,4 +209,5 @@ class ActionDispatch::IntegrationTest
  end
  
  # Ensure permissions are computed from the test fixtures.
-User.invalidate_permissions_cache
+refresh_permissions
+refresh_trashed
diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb

index bf1ba517ebcb6bf26aec3027a084fee086ff810b..addea83062404b84baf1911d6b81a262d582ce05 100644 (file)
--- a/services/api/test/unit/collection_test.rb
+++ b/services/api/test/unit/collection_test.rb
@@ -1000,6 +1000,19 @@ class CollectionTest < ActiveSupport::TestCase
    test "delete referring links in SweepTrashedObjects" do
      uuid = collections(:trashed_on_next_sweep).uuid
      act_as_system_user do
+      assert_raises ActiveRecord::RecordInvalid do
+        # Cannot create because :trashed_on_next_sweep is already trashed
+        Link.create!(head_uuid: uuid,
+                     tail_uuid: system_user_uuid,
+                     link_class: 'whatever',
+                     name: 'something')
+      end
+
+      # Bump trash_at to now + 1 minute
+      Collection.where(uuid: uuid).
+        update(trash_at: db_current_time + (1).minute)
+
+      # Not considered trashed now
        Link.create!(head_uuid: uuid,
                     tail_uuid: system_user_uuid,
                     link_class: 'whatever',
diff --git a/services/api/test/unit/owner_test.rb b/services/api/test/unit/owner_test.rb

index 528c6d253f49b9d356a3a7c857e2117690ecd228..ca02e2db5e3a08a8fb3ecdd79222e2835f8a5e2d 100644 (file)
--- a/services/api/test/unit/owner_test.rb
+++ b/services/api/test/unit/owner_test.rb
@@ -70,8 +70,12 @@ class OwnerTest < ActiveSupport::TestCase
               "new #{o_class} should really be in DB")
        old_uuid = o.uuid
        new_uuid = o.uuid.sub(/..........$/, rand(2**256).to_s(36)[0..9])
-      assert(o.update_attributes(uuid: new_uuid),
-             "should change #{o_class} uuid from #{old_uuid} to #{new_uuid}")
+      if o.respond_to? :update_uuid
+        o.update_uuid(new_uuid: new_uuid)
+      else
+        assert(o.update_attributes(uuid: new_uuid),
+               "should change #{o_class} uuid from #{old_uuid} to #{new_uuid}")
+      end
        assert_equal(false, o_class.where(uuid: old_uuid).any?,
                     "#{old_uuid} should disappear when renamed to #{new_uuid}")
      end
@@ -83,9 +87,11 @@ class OwnerTest < ActiveSupport::TestCase
        assert_equal(true, Specimen.where(owner_uuid: o.uuid).any?,
                     "need something to be owned by #{o.uuid} for this test")
  
-      assert_raises(ActiveRecord::DeleteRestrictionError,
-                    "should not delete #{ofixt} that owns objects") do
-        o.destroy
+      skip_check_permissions_against_full_refresh do
+        assert_raises(ActiveRecord::DeleteRestrictionError,
+                      "should not delete #{ofixt} that owns objects") do
+          o.destroy
+        end
        end
      end
  
@@ -104,9 +110,14 @@ class OwnerTest < ActiveSupport::TestCase
      assert User.where(uuid: o.uuid).any?, "new User should really be in DB"
      assert_equal(true, o.update_attributes(owner_uuid: o.uuid),
                   "setting owner to self should work")
-    assert(o.destroy, "should delete User that owns self")
+
+    skip_check_permissions_against_full_refresh do
+      assert(o.destroy, "should delete User that owns self")
+    end
+
      assert_equal(false, User.where(uuid: o.uuid).any?,
                   "#{o.uuid} should not be in DB after deleting")
+    check_permissions_against_full_refresh
    end
  
    test "change uuid of User that owns self" do
@@ -116,8 +127,8 @@ class OwnerTest < ActiveSupport::TestCase
                   "setting owner to self should work")
      old_uuid = o.uuid
      new_uuid = o.uuid.sub(/..........$/, rand(2**256).to_s(36)[0..9])
-    assert(o.update_attributes(uuid: new_uuid),
-           "should change uuid of User that owns self")
+    o.update_uuid(new_uuid: new_uuid)
+    o = User.find_by_uuid(new_uuid)
      assert_equal(false, User.where(uuid: old_uuid).any?,
                   "#{old_uuid} should not be in DB after deleting")
      assert_equal(true, User.where(uuid: new_uuid).any?,
diff --git a/services/api/test/unit/permission_test.rb b/services/api/test/unit/permission_test.rb

index 18d2fbbcb5f7cef9d87cc71df1077b5a4c8cf1d6..cb5ae7ba2f2367462229927e364e640913727141 100644 (file)
--- a/services/api/test/unit/permission_test.rb
+++ b/services/api/test/unit/permission_test.rb
@@ -10,7 +10,7 @@ class PermissionTest < ActiveSupport::TestCase
    test "Grant permissions on an object I own" do
      set_user_from_auth :active_trustedclient
  
-    ob = Specimen.create
+    ob = Collection.create
      assert ob.save
  
      # Ensure I have permission to manage this group even when its owner changes
@@ -24,7 +24,7 @@ class PermissionTest < ActiveSupport::TestCase
    test "Delete permission links when deleting an object" do
      set_user_from_auth :active_trustedclient
  
-    ob = Specimen.create!
+    ob = Collection.create!
      Link.create!(tail_uuid: users(:active).uuid,
                   head_uuid: ob.uuid,
                   link_class: 'permission',
@@ -37,7 +37,7 @@ class PermissionTest < ActiveSupport::TestCase
  
    test "permission links owned by root" do
      set_user_from_auth :active_trustedclient
-    ob = Specimen.create!
+    ob = Collection.create!
      perm_link = Link.create!(tail_uuid: users(:active).uuid,
                               head_uuid: ob.uuid,
                               link_class: 'permission',
@@ -48,18 +48,18 @@ class PermissionTest < ActiveSupport::TestCase
    test "readable_by" do
      set_user_from_auth :active_trustedclient
  
-    ob = Specimen.create!
+    ob = Collection.create!
      Link.create!(tail_uuid: users(:active).uuid,
                   head_uuid: ob.uuid,
                   link_class: 'permission',
                   name: 'can_read')
-    assert Specimen.readable_by(users(:active)).where(uuid: ob.uuid).any?, "user does not have read permission"
+    assert Collection.readable_by(users(:active)).where(uuid: ob.uuid).any?, "user does not have read permission"
    end
  
    test "writable_by" do
      set_user_from_auth :active_trustedclient
  
-    ob = Specimen.create!
+    ob = Collection.create!
      Link.create!(tail_uuid: users(:active).uuid,
                   head_uuid: ob.uuid,
                   link_class: 'permission',
@@ -67,6 +67,34 @@ class PermissionTest < ActiveSupport::TestCase
      assert ob.writable_by.include?(users(:active).uuid), "user does not have write permission"
    end
  
+  test "update permission link" do
+    set_user_from_auth :admin
+
+    grp = Group.create! name: "blah project", group_class: "project"
+    ob = Collection.create! owner_uuid: grp.uuid
+
+    assert !users(:active).can?(write: ob)
+    assert !users(:active).can?(read: ob)
+
+    l1 = Link.create!(tail_uuid: users(:active).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_write')
+
+    assert users(:active).can?(write: ob)
+    assert users(:active).can?(read: ob)
+
+    l1.update_attributes!(name: 'can_read')
+
+    assert !users(:active).can?(write: ob)
+    assert users(:active).can?(read: ob)
+
+    l1.destroy
+
+    assert !users(:active).can?(write: ob)
+    assert !users(:active).can?(read: ob)
+  end
+
    test "writable_by reports requesting user's own uuid for a writable project" do
      invited_to_write = users(:project_viewer)
      group = groups(:asubproject)
@@ -124,16 +152,16 @@ class PermissionTest < ActiveSupport::TestCase
    test "user owns group, group can_manage object's group, user can add permissions" do
      set_user_from_auth :admin
  
-    owner_grp = Group.create!(owner_uuid: users(:active).uuid)
-
-    sp_grp = Group.create!
-    sp = Specimen.create!(owner_uuid: sp_grp.uuid)
+    owner_grp = Group.create!(owner_uuid: users(:active).uuid, group_class: "role")
+    sp_grp = Group.create!(group_class: "project")
  
      Link.create!(link_class: 'permission',
                   name: 'can_manage',
                   tail_uuid: owner_grp.uuid,
                   head_uuid: sp_grp.uuid)
  
+    sp = Collection.create!(owner_uuid: sp_grp.uuid)
+
      # active user owns owner_grp, which has can_manage permission on sp_grp
      # user should be able to add permissions on sp.
      set_user_from_auth :active_trustedclient
@@ -149,7 +177,7 @@ class PermissionTest < ActiveSupport::TestCase
    skip "can_manage permission on a non-group object" do
      set_user_from_auth :admin
  
-    ob = Specimen.create!
+    ob = Collection.create!
      # grant can_manage permission to active
      perm_link = Link.create!(tail_uuid: users(:active).uuid,
                               head_uuid: ob.uuid,
@@ -170,7 +198,7 @@ class PermissionTest < ActiveSupport::TestCase
    test "user without can_manage permission may not modify permission link" do
      set_user_from_auth :admin
  
-    ob = Specimen.create!
+    ob = Collection.create!
      # grant can_manage permission to active
      perm_link = Link.create!(tail_uuid: users(:active).uuid,
                               head_uuid: ob.uuid,
@@ -192,7 +220,8 @@ class PermissionTest < ActiveSupport::TestCase
      manager = create :active_user, first_name: "Manage", last_name: "Er"
      minion = create :active_user, first_name: "Min", last_name: "Ion"
      minions_specimen = act_as_user minion do
-      Specimen.create!
+      g = Group.create! name: "minon project", group_class: "project"
+      Collection.create! owner_uuid: g.uuid
      end
      # Manager creates a group. (Make sure it doesn't magically give
      # anyone any additional permissions.)
@@ -255,7 +284,7 @@ class PermissionTest < ActiveSupport::TestCase
          create(:permission_link,
                 name: 'can_manage', tail_uuid: manager.uuid, head_uuid: minion.uuid)
        end
-      assert_empty(Specimen
+      assert_empty(Collection
                       .readable_by(manager)
                       .where(uuid: minions_specimen.uuid),
                     "manager saw the minion's private stuff")
@@ -273,7 +302,7 @@ class PermissionTest < ActiveSupport::TestCase
  
      act_as_user manager do
        # Now, manager can read and write Minion's stuff.
-      assert_not_empty(Specimen
+      assert_not_empty(Collection
                           .readable_by(manager)
                           .where(uuid: minions_specimen.uuid),
                         "manager could not find minion's specimen by uuid")
@@ -309,12 +338,12 @@ class PermissionTest < ActiveSupport::TestCase
                       "#{a.first_name} should be able to see 'b' in the user list")
  
      a_specimen = act_as_user a do
-      Specimen.create!
+      Collection.create!
      end
-    assert_not_empty(Specimen.readable_by(a).where(uuid: a_specimen.uuid),
-                     "A cannot read own Specimen, following test probably useless.")
-    assert_empty(Specimen.readable_by(b).where(uuid: a_specimen.uuid),
-                 "B can read A's Specimen")
+    assert_not_empty(Collection.readable_by(a).where(uuid: a_specimen.uuid),
+                     "A cannot read own Collection, following test probably useless.")
+    assert_empty(Collection.readable_by(b).where(uuid: a_specimen.uuid),
+                 "B can read A's Collection")
      [a,b].each do |u|
        assert_empty(User.readable_by(u).where(uuid: other.uuid),
                     "#{u.first_name} can see OTHER in the user list")
@@ -341,13 +370,13 @@ class PermissionTest < ActiveSupport::TestCase
    test "cannot create with owner = unwritable user" do
      set_user_from_auth :rominiadmin
      assert_raises ArvadosModel::PermissionDeniedError, "created with owner = unwritable user" do
-      Specimen.create!(owner_uuid: users(:active).uuid)
+      Collection.create!(owner_uuid: users(:active).uuid)
      end
    end
  
    test "cannot change owner to unwritable user" do
      set_user_from_auth :rominiadmin
-    ob = Specimen.create!
+    ob = Collection.create!
      assert_raises ArvadosModel::PermissionDeniedError, "changed owner to unwritable user" do
        ob.update_attributes!(owner_uuid: users(:active).uuid)
      end
@@ -356,13 +385,13 @@ class PermissionTest < ActiveSupport::TestCase
    test "cannot create with owner = unwritable group" do
      set_user_from_auth :rominiadmin
      assert_raises ArvadosModel::PermissionDeniedError, "created with owner = unwritable group" do
-      Specimen.create!(owner_uuid: groups(:aproject).uuid)
+      Collection.create!(owner_uuid: groups(:aproject).uuid)
      end
    end
  
    test "cannot change owner to unwritable group" do
      set_user_from_auth :rominiadmin
-    ob = Specimen.create!
+    ob = Collection.create!
      assert_raises ArvadosModel::PermissionDeniedError, "changed owner to unwritable group" do
        ob.update_attributes!(owner_uuid: groups(:aproject).uuid)
      end
@@ -390,4 +419,159 @@ class PermissionTest < ActiveSupport::TestCase
  
      assert_not_empty container_logs(:running_older, :anonymous)
    end
+
+  test "add user to group, then remove them" do
+    set_user_from_auth :admin
+    grp = Group.create!(owner_uuid: system_user_uuid, group_class: "role")
+    col = Collection.create!(owner_uuid: grp.uuid)
+    assert_empty Collection.readable_by(users(:active)).where(uuid: col.uuid)
+    assert_empty User.readable_by(users(:active)).where(uuid: users(:project_viewer).uuid)
+
+    l1 = Link.create!(tail_uuid: users(:active).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+    l2 = Link.create!(tail_uuid: grp.uuid,
+                 head_uuid: users(:active).uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    l3 = Link.create!(tail_uuid: users(:project_viewer).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+    l4 = Link.create!(tail_uuid: grp.uuid,
+                 head_uuid: users(:project_viewer).uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert User.readable_by(users(:active)).where(uuid: users(:project_viewer).uuid).first
+
+    l1.destroy
+    l2.destroy
+
+    assert_empty Collection.readable_by(users(:active)).where(uuid: col.uuid)
+    assert_empty User.readable_by(users(:active)).where(uuid: users(:project_viewer).uuid)
+
+  end
+
+
+  test "add user to group, then change permission level" do
+    set_user_from_auth :admin
+    grp = Group.create!(owner_uuid: system_user_uuid, group_class: "role")
+    col = Collection.create!(owner_uuid: grp.uuid)
+    assert_empty Collection.readable_by(users(:active)).where(uuid: col.uuid)
+    assert_empty User.readable_by(users(:active)).where(uuid: users(:project_viewer).uuid)
+
+    l1 = Link.create!(tail_uuid: users(:active).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_manage')
+    l2 = Link.create!(tail_uuid: grp.uuid,
+                 head_uuid: users(:active).uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert users(:active).can?(read: col.uuid)
+    assert users(:active).can?(write: col.uuid)
+    assert users(:active).can?(manage: col.uuid)
+
+    l1.name = 'can_read'
+    l1.save!
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert users(:active).can?(read: col.uuid)
+    assert !users(:active).can?(write: col.uuid)
+    assert !users(:active).can?(manage: col.uuid)
+
+    l1.name = 'can_write'
+    l1.save!
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert users(:active).can?(read: col.uuid)
+    assert users(:active).can?(write: col.uuid)
+    assert !users(:active).can?(manage: col.uuid)
+  end
+
+
+  test "add user to group, then add overlapping permission link to group" do
+    set_user_from_auth :admin
+    grp = Group.create!(owner_uuid: system_user_uuid, group_class: "role")
+    col = Collection.create!(owner_uuid: grp.uuid)
+    assert_empty Collection.readable_by(users(:active)).where(uuid: col.uuid)
+    assert_empty User.readable_by(users(:active)).where(uuid: users(:project_viewer).uuid)
+
+    l1 = Link.create!(tail_uuid: users(:active).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_manage')
+    l2 = Link.create!(tail_uuid: grp.uuid,
+                 head_uuid: users(:active).uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert users(:active).can?(read: col.uuid)
+    assert users(:active).can?(write: col.uuid)
+    assert users(:active).can?(manage: col.uuid)
+
+    l3 = Link.create!(tail_uuid: users(:active).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert users(:active).can?(read: col.uuid)
+    assert users(:active).can?(write: col.uuid)
+    assert users(:active).can?(manage: col.uuid)
+
+    l3.destroy!
+
+    assert Collection.readable_by(users(:active)).where(uuid: col.uuid).first
+    assert users(:active).can?(read: col.uuid)
+    assert users(:active).can?(write: col.uuid)
+    assert users(:active).can?(manage: col.uuid)
+  end
+
+
+  test "add user to group, then add overlapping permission link to subproject" do
+    set_user_from_auth :admin
+    grp = Group.create!(owner_uuid: system_user_uuid, group_class: "project")
+    prj = Group.create!(owner_uuid: grp.uuid, group_class: "project")
+    assert_empty Group.readable_by(users(:active)).where(uuid: prj.uuid)
+    assert_empty User.readable_by(users(:active)).where(uuid: users(:project_viewer).uuid)
+
+    l1 = Link.create!(tail_uuid: users(:active).uuid,
+                 head_uuid: grp.uuid,
+                 link_class: 'permission',
+                 name: 'can_manage')
+    l2 = Link.create!(tail_uuid: grp.uuid,
+                 head_uuid: users(:active).uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    assert Group.readable_by(users(:active)).where(uuid: prj.uuid).first
+    assert users(:active).can?(read: prj.uuid)
+    assert users(:active).can?(write: prj.uuid)
+    assert users(:active).can?(manage: prj.uuid)
+
+    l3 = Link.create!(tail_uuid: grp.uuid,
+                 head_uuid: prj.uuid,
+                 link_class: 'permission',
+                 name: 'can_read')
+
+    assert Group.readable_by(users(:active)).where(uuid: prj.uuid).first
+    assert users(:active).can?(read: prj.uuid)
+    assert users(:active).can?(write: prj.uuid)
+    assert users(:active).can?(manage: prj.uuid)
+
+    l3.destroy!
+
+    assert Group.readable_by(users(:active)).where(uuid: prj.uuid).first
+    assert users(:active).can?(read: prj.uuid)
+    assert users(:active).can?(write: prj.uuid)
+    assert users(:active).can?(manage: prj.uuid)
+  end
  end
diff --git a/services/api/test/unit/user_test.rb b/services/api/test/unit/user_test.rb

index 260795c12f8969333044f3c8917f6fe8cd2432e8..7fcd36d7091a4c7a00a9af6ae50f10f5a413871d 100644 (file)
--- a/services/api/test/unit/user_test.rb
+++ b/services/api/test/unit/user_test.rb
@@ -165,7 +165,9 @@ class UserTest < ActiveSupport::TestCase
  
        if auto_admin_first_user_config
          # This test requires no admin users exist (except for the system user)
-        users(:admin).delete
+        act_as_system_user do
+          users(:admin).update_attributes!(is_admin: false)
+        end
          @all_users = User.where("uuid not like '%-000000000000000'").where(:is_admin => true)
          assert_equal 0, @all_users.count, "No admin users should exist (except for the system user)"
        end
@@ -476,15 +478,6 @@ class UserTest < ActiveSupport::TestCase
  
      vm = VirtualMachine.create
  
-    # Set up the bogus Link
-    bad_uuid = 'zzzzz-tpzed-xyzxyzxyzxyzxyz'
-
-    resp_link = Link.create ({tail_uuid: email, link_class: 'permission',
-        name: 'can_login', head_uuid: bad_uuid})
-    resp_link.save(validate: false)
-
-    verify_link resp_link, 'permission', 'can_login', email, bad_uuid
-
      response = user.setup(repo_name: 'foo/testrepo',
                            vm_uuid: vm.uuid)
  
diff --git a/services/keepstore/handler_test.go b/services/keepstore/handler_test.go

index f1a8c292557b1ba89d9fa9b7b50ac519d718acb7..17ed6402ce0d79ab4dc1bddf30e6b0315df7fa16 100644 (file)
--- a/services/keepstore/handler_test.go
+++ b/services/keepstore/handler_test.go
@@ -318,6 +318,57 @@ func (s *HandlerSuite) TestPutAndDeleteSkipReadonlyVolumes(c *check.C) {
         }
  }
  
+// Test TOUCH requests.
+func (s *HandlerSuite) TestTouchHandler(c *check.C) {
+       c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+       vols := s.handler.volmgr.AllWritable()
+       vols[0].Put(context.Background(), TestHash, TestBlock)
+       vols[0].Volume.(*MockVolume).TouchWithDate(TestHash, time.Now().Add(-time.Hour))
+       afterPut := time.Now()
+       t, err := vols[0].Mtime(TestHash)
+       c.Assert(err, check.IsNil)
+       c.Assert(t.Before(afterPut), check.Equals, true)
+
+       ExpectStatusCode(c,
+               "touch with no credentials",
+               http.StatusUnauthorized,
+               IssueRequest(s.handler, &RequestTester{
+                       method: "TOUCH",
+                       uri:    "/" + TestHash,
+               }))
+
+       ExpectStatusCode(c,
+               "touch with non-root credentials",
+               http.StatusUnauthorized,
+               IssueRequest(s.handler, &RequestTester{
+                       method:   "TOUCH",
+                       uri:      "/" + TestHash,
+                       apiToken: arvadostest.ActiveTokenV2,
+               }))
+
+       ExpectStatusCode(c,
+               "touch non-existent block",
+               http.StatusNotFound,
+               IssueRequest(s.handler, &RequestTester{
+                       method:   "TOUCH",
+                       uri:      "/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+                       apiToken: s.cluster.SystemRootToken,
+               }))
+
+       beforeTouch := time.Now()
+       ExpectStatusCode(c,
+               "touch block",
+               http.StatusOK,
+               IssueRequest(s.handler, &RequestTester{
+                       method:   "TOUCH",
+                       uri:      "/" + TestHash,
+                       apiToken: s.cluster.SystemRootToken,
+               }))
+       t, err = vols[0].Mtime(TestHash)
+       c.Assert(err, check.IsNil)
+       c.Assert(t.After(beforeTouch), check.Equals, true)
+}
+
  // Test /index requests:
  //   - unauthenticated /index request
  //   - unauthenticated /index/prefix request
diff --git a/services/keepstore/handlers.go b/services/keepstore/handlers.go

index 3d0f893d82fc20c8a01a833aa050f6203a1c70c7..eb0ea5ad2f133f3b8a569fa354255521f08c5965 100644 (file)
--- a/services/keepstore/handlers.go
+++ b/services/keepstore/handlers.go
@@ -66,6 +66,8 @@ func MakeRESTRouter(ctx context.Context, cluster *arvados.Cluster, reg *promethe
         // List blocks stored here whose hash has the given prefix.
         // Privileged client only.
         rtr.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, rtr.handleIndex).Methods("GET", "HEAD")
+       // Update timestamp on existing block. Privileged client only.
+       rtr.HandleFunc(`/{hash:[0-9a-f]{32}}`, rtr.handleTOUCH).Methods("TOUCH")
  
         // Internals/debugging info (runtime.MemStats)
         rtr.HandleFunc(`/debug.json`, rtr.DebugHandler).Methods("GET", "HEAD")
@@ -191,6 +193,34 @@ func getBufferWithContext(ctx context.Context, bufs *bufferPool, bufSize int) ([
         }
  }
  
+func (rtr *router) handleTOUCH(resp http.ResponseWriter, req *http.Request) {
+       if !rtr.isSystemAuth(GetAPIToken(req)) {
+               http.Error(resp, UnauthorizedError.Error(), UnauthorizedError.HTTPCode)
+               return
+       }
+       hash := mux.Vars(req)["hash"]
+       vols := rtr.volmgr.AllWritable()
+       if len(vols) == 0 {
+               http.Error(resp, "no volumes", http.StatusNotFound)
+               return
+       }
+       var err error
+       for _, mnt := range vols {
+               err = mnt.Touch(hash)
+               if err == nil {
+                       break
+               }
+       }
+       switch {
+       case err == nil:
+               return
+       case os.IsNotExist(err):
+               http.Error(resp, err.Error(), http.StatusNotFound)
+       default:
+               http.Error(resp, err.Error(), http.StatusInternalServerError)
+       }
+}
+
  func (rtr *router) handlePUT(resp http.ResponseWriter, req *http.Request) {
         ctx, cancel := contextForResponse(context.TODO(), resp)
         defer cancel()
diff --git a/services/keepstore/unix_volume.go b/services/keepstore/unix_volume.go

index 5026e2d32558e085886ba119cf0b664bfbc58473..1706473cc892c43cbd5ad27751c49f43cbebc075 100644 (file)
--- a/services/keepstore/unix_volume.go
+++ b/services/keepstore/unix_volume.go
@@ -699,10 +699,20 @@ func (v *UnixVolume) EmptyTrash() {
         err := filepath.Walk(v.Root, func(path string, info os.FileInfo, err error) error {
                 if err != nil {
                         v.logger.WithError(err).Errorf("EmptyTrash: filepath.Walk(%q) failed", path)
+                       // Don't give up -- keep walking other
+                       // files/dirs
                         return nil
+               } else if !info.Mode().IsDir() {
+                       todo <- dirent{path, info}
+                       return nil
+               } else if path == v.Root || blockDirRe.MatchString(info.Name()) {
+                       // Descend into a directory that we might have
+                       // put trash in.
+                       return nil
+               } else {
+                       // Don't descend into other dirs.
+                       return filepath.SkipDir
                 }
-               todo <- dirent{path, info}
-               return nil
         })
         close(todo)
         wg.Wait()
diff --git a/services/keepstore/unix_volume_test.go b/services/keepstore/unix_volume_test.go

index 5a3a536944daa5b8012bc0b2afbf8b6932862364..6b42dbc519ac933a0ddca0092fc1b14fb1b599d8 100644 (file)
--- a/services/keepstore/unix_volume_test.go
+++ b/services/keepstore/unix_volume_test.go
@@ -424,3 +424,26 @@ func (s *UnixVolumeSuite) TestStats(c *check.C) {
         c.Check(err, check.IsNil)
         c.Check(stats(), check.Matches, `.*"FlockOps":2,.*`)
  }
+
+func (s *UnixVolumeSuite) TestSkipUnusedDirs(c *check.C) {
+       vol := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
+
+       err := os.Mkdir(vol.UnixVolume.Root+"/aaa", 0777)
+       c.Assert(err, check.IsNil)
+       err = os.Mkdir(vol.UnixVolume.Root+"/.aaa", 0777) // EmptyTrash should not look here
+       c.Assert(err, check.IsNil)
+       deleteme := vol.UnixVolume.Root + "/aaa/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.trash.1"
+       err = ioutil.WriteFile(deleteme, []byte{1, 2, 3}, 0777)
+       c.Assert(err, check.IsNil)
+       skipme := vol.UnixVolume.Root + "/.aaa/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.trash.1"
+       err = ioutil.WriteFile(skipme, []byte{1, 2, 3}, 0777)
+       c.Assert(err, check.IsNil)
+       vol.EmptyTrash()
+
+       _, err = os.Stat(skipme)
+       c.Check(err, check.IsNil)
+
+       _, err = os.Stat(deleteme)
+       c.Check(err, check.NotNil)
+       c.Check(os.IsNotExist(err), check.Equals, true)
+}
diff --git a/services/keepstore/volume_test.go b/services/keepstore/volume_test.go

index a928a71a2c0315a7666bb4f9ede138e3403d24cf..2de21edde6708faa3aff96ef32f2b61f191a9775 100644 (file)
--- a/services/keepstore/volume_test.go
+++ b/services/keepstore/volume_test.go
@@ -178,13 +178,20 @@ func (v *MockVolume) Put(ctx context.Context, loc string, block []byte) error {
  }
  
  func (v *MockVolume) Touch(loc string) error {
+       return v.TouchWithDate(loc, time.Now())
+}
+
+func (v *MockVolume) TouchWithDate(loc string, t time.Time) error {
         v.gotCall("Touch")
         <-v.Gate
         if v.volume.ReadOnly {
                 return MethodDisabledError
         }
+       if _, exists := v.Store[loc]; !exists {
+               return os.ErrNotExist
+       }
         if v.Touchable {
-               v.Timestamps[loc] = time.Now()
+               v.Timestamps[loc] = t
                 return nil
         }
         return errors.New("Touch failed")
author	Ward Vandewege <ward@jhvc.com>
	Thu, 18 Jun 2020 15:52:23 +0000 (11:52 -0400)
committer	Ward Vandewege <ward@jhvc.com>
	Thu, 18 Jun 2020 15:53:12 +0000 (11:53 -0400)
apps/workbench/Gemfile.lock		patch \| blob \| history
apps/workbench/app/views/layouts/application.html.erb		patch \| blob \| history
cmd/arvados-server/cmd.go		patch \| blob \| history
doc/_config.yml		patch \| blob \| history
doc/admin/recovering-deleted-collections.html.textile.liquid	[new file with mode: 0644]	patch \| blob
doc/install/setup-login.html.textile.liquid		patch \| blob \| history
lib/config/config.default.yml		patch \| blob \| history
lib/config/export.go		patch \| blob \| history
lib/config/generated_config.go		patch \| blob \| history
lib/config/load.go		patch \| blob \| history
lib/controller/handler_test.go		patch \| blob \| history
lib/controller/localdb/login.go		patch \| blob \| history
lib/controller/localdb/login_oidc.go	[moved from lib/controller/localdb/login_google.go with 66% similarity]	patch \| blob \| history
lib/controller/localdb/login_oidc_test.go	[moved from lib/controller/localdb/login_google_test.go with 65% similarity]	patch \| blob \| history
lib/recovercollection/cmd.go	[new file with mode: 0644]	patch \| blob
lib/recovercollection/cmd_test.go	[new file with mode: 0644]	patch \| blob
sdk/go/arvados/blob_signature.go	[new file with mode: 0644]	patch \| blob
sdk/go/arvados/blob_signature_test.go	[new file with mode: 0644]	patch \| blob
sdk/go/arvados/config.go		patch \| blob \| history
sdk/go/arvados/container.go		patch \| blob \| history
sdk/go/arvados/keep_service.go		patch \| blob \| history
sdk/go/keepclient/perms.go		patch \| blob \| history
sdk/go/keepclient/perms_test.go	[deleted file]	patch \| blob \| history
sdk/python/setup.py		patch \| blob \| history
services/api/Gemfile.lock		patch \| blob \| history
services/api/app/controllers/database_controller.rb		patch \| blob \| history
services/api/app/models/arvados_model.rb		patch \| blob \| history
services/api/app/models/database_seeds.rb		patch \| blob \| history
services/api/app/models/group.rb		patch \| blob \| history
services/api/app/models/link.rb		patch \| blob \| history
services/api/app/models/materialized_permission.rb	[new file with mode: 0644]	patch \| blob
services/api/app/models/trashed_group.rb	[new file with mode: 0644]	patch \| blob
services/api/app/models/user.rb		patch \| blob \| history
services/api/db/migrate/20200501150153_permission_table.rb	[new file with mode: 0644]	patch \| blob
services/api/db/structure.sql		patch \| blob \| history
services/api/lib/20200501150153_permission_table_constants.rb	[new file with mode: 0644]	patch \| blob
services/api/lib/refresh_permission_view.rb	[deleted file]	patch \| blob \| history
services/api/lib/update_permissions.rb	[new file with mode: 0644]	patch \| blob
services/api/test/fixtures/groups.yml		patch \| blob \| history
services/api/test/fixtures/users.yml		patch \| blob \| history
services/api/test/functional/arvados/v1/groups_controller_test.rb		patch \| blob \| history
services/api/test/integration/groups_test.rb		patch \| blob \| history
services/api/test/performance/permission_test.rb		patch \| blob \| history
services/api/test/test_helper.rb		patch \| blob \| history
services/api/test/unit/collection_test.rb		patch \| blob \| history
services/api/test/unit/owner_test.rb		patch \| blob \| history
services/api/test/unit/permission_test.rb		patch \| blob \| history
services/api/test/unit/user_test.rb		patch \| blob \| history
services/keepstore/handler_test.go		patch \| blob \| history
services/keepstore/handlers.go		patch \| blob \| history
services/keepstore/unix_volume.go		patch \| blob \| history
services/keepstore/unix_volume_test.go		patch \| blob \| history
services/keepstore/volume_test.go		patch \| blob \| history