Merge branch '16007-permission-table-rb' refs #16007
authorPeter Amstutz <peter.amstutz@curii.com>
Tue, 16 Jun 2020 21:25:38 +0000 (17:25 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Tue, 16 Jun 2020 21:25:38 +0000 (17:25 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

cmd/arvados-server/cmd.go
doc/_config.yml
doc/admin/recovering-deleted-collections.html.textile.liquid [new file with mode: 0644]
lib/recovercollection/cmd.go [moved from lib/undelete/cmd.go with 62% similarity]
lib/recovercollection/cmd_test.go [moved from lib/undelete/cmd_test.go with 76% similarity]
services/keepstore/unix_volume.go
services/keepstore/unix_volume_test.go

index 1b2de11accefe995511194c5941f25af3ccd35e4..ff99de75c41ad13f630d0902c2e695c6c17ad5c9 100644 (file)
@@ -15,7 +15,7 @@ import (
        "git.arvados.org/arvados.git/lib/crunchrun"
        "git.arvados.org/arvados.git/lib/dispatchcloud"
        "git.arvados.org/arvados.git/lib/install"
-       "git.arvados.org/arvados.git/lib/undelete"
+       "git.arvados.org/arvados.git/lib/recovercollection"
        "git.arvados.org/arvados.git/services/ws"
 )
 
@@ -25,17 +25,17 @@ var (
                "-version":  cmd.Version,
                "--version": cmd.Version,
 
-               "boot":            boot.Command,
-               "cloudtest":       cloudtest.Command,
-               "config-check":    config.CheckCommand,
-               "config-defaults": config.DumpDefaultsCommand,
-               "config-dump":     config.DumpCommand,
-               "controller":      controller.Command,
-               "crunch-run":      crunchrun.Command,
-               "dispatch-cloud":  dispatchcloud.Command,
-               "install":         install.Command,
-               "undelete":        undelete.Command,
-               "ws":              ws.Command,
+               "boot":               boot.Command,
+               "cloudtest":          cloudtest.Command,
+               "config-check":       config.CheckCommand,
+               "config-defaults":    config.DumpDefaultsCommand,
+               "config-dump":        config.DumpCommand,
+               "controller":         controller.Command,
+               "crunch-run":         crunchrun.Command,
+               "dispatch-cloud":     dispatchcloud.Command,
+               "install":            install.Command,
+               "recover-collection": recovercollection.Command,
+               "ws":                 ws.Command,
        })
 )
 
index 48fe1b53d49149139cf36e0b602e0f0fe4d2ab3e..3b59cbca45205983ba4b83429f06b914946a53dd 100644 (file)
@@ -174,6 +174,7 @@ navbar:
       - admin/logs-table-management.html.textile.liquid
       - admin/workbench2-vocabulary.html.textile.liquid
       - admin/storage-classes.html.textile.liquid
+      - admin/recovering-deleted-collections.html.textile.liquid
     - Cloud:
       - admin/spot-instances.html.textile.liquid
       - admin/cloudtest.html.textile.liquid
diff --git a/doc/admin/recovering-deleted-collections.html.textile.liquid b/doc/admin/recovering-deleted-collections.html.textile.liquid
new file mode 100644 (file)
index 0000000..59c576c
--- /dev/null
@@ -0,0 +1,37 @@
+---
+layout: default
+navsection: admin
+title: Recovering deleted collections
+...
+
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+In some cases, it is possible to recover files that have been lost by modifying or deleting a collection.
+
+Possibility of recovery depends on many factors, including:
+* Whether the collection manifest is still available, e.g., in an audit log entry
+* Whether the data blocks are also referenced by other collections
+* Whether the data blocks have been unreferenced long enough to be marked for deletion/trash by keep-balance
+* Blob signature TTL, trash lifetime, trash check interval, and other config settings
+
+To attempt recovery of a previous version of a deleted/modified collection, use the @arvados-server recover-collection@ command. It should be run on one of your server nodes where the @arvados-server@ package is installed and the @/etc/arvados/config.yml@ file is up to date.
+
+Specify the collection you want to recover by passing either the UUID of an audit log entry, or a file containing the manifest.
+
+If recovery is successful, the @recover-collection@ program saves the recovered data a new collection belonging to the system user, and prints the new collection's UUID on stdout.
+
+<pre>
+# arvados-server recover-collection 9tee4-57u5n-nb5awmk1pahac2t
+INFO[2020-06-05T19:52:29.557761245Z] loaded log entry                              logged_event_time="2020-06-05 16:48:01.438791 +0000 UTC" logged_event_type=update old_collection_uuid=9tee4-4zz18-1ex26g95epmgw5w src=9tee4-57u5n-nb5awmk1pahac2t
+INFO[2020-06-05T19:52:29.642145127Z] recovery succeeded                            UUID=9tee4-4zz18-5trfp4k4xxg97f1 src=9tee4-57u5n-nb5awmk1pahac2t
+9tee4-4zz18-5trfp4k4xxg97f1
+INFO[2020-06-05T19:52:29.644699436Z] exiting
+</pre>
+
+In this example, the original data has been restored and saved in a new collection with UUID @9tee4-4zz18-5trfp4k4xxg97f1@.
+
+For more options, run @arvados-server recover-collection -help@.
similarity index 62%
rename from lib/undelete/cmd.go
rename to lib/recovercollection/cmd.go
index 09adfae3c6ca8002fae8477666434038031b3e32..cea4607c98fe533fec8b37f839f1f641ce3fcccb 100644 (file)
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package undelete
+package recovercollection
 
 import (
        "context"
@@ -42,7 +42,7 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
        flags.SetOutput(stderr)
        flags.Usage = func() {
                fmt.Fprintf(flags.Output(), `Usage:
-       %s [options ...] /path/to/manifest.txt [...]
+       %s [options ...] { /path/to/manifest.txt | log-or-collection-uuid } [...]
 
        This program recovers deleted collections. Recovery is
        possible when the collection's manifest is still available and
@@ -52,10 +52,25 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
        collections, or the blocks have been trashed but not yet
        deleted).
 
+       There are multiple ways to specify a collection to recover:
+
+        * Path to a local file containing a manifest with the desired
+         data
+
+       * UUID of an Arvados log entry, typically a "delete" or
+         "update" event, whose "old attributes" have a manifest with
+         the desired data
+
+       * UUID of an Arvados collection whose most recent log entry,
+          typically a "delete" or "update" event, has the desired
+          data in its "old attributes"
+
        For each provided collection manifest, once all data blocks
        are recovered/protected from garbage collection, a new
        collection is saved and its UUID is printed on stdout.
 
+       Restored collections will belong to the system (root) user.
+
        Exit status will be zero if recovery is successful, i.e., a
        collection is saved for each provided manifest.
 Options:
@@ -96,7 +111,7 @@ Options:
                return 1
        }
        client.AuthToken = cluster.SystemRootToken
-       und := undeleter{
+       rcvr := recoverer{
                client:  client,
                cluster: cluster,
                logger:  logger,
@@ -105,31 +120,81 @@ Options:
        exitcode := 0
        for _, src := range flags.Args() {
                logger := logger.WithField("src", src)
-               if len(src) == 27 && src[5:12] == "-57u5n-" {
-                       logger.Error("log entry lookup not implemented")
-                       exitcode = 1
-                       continue
-               } else {
-                       mtxt, err := ioutil.ReadFile(src)
+               var mtxt string
+               if !strings.Contains(src, "/") && len(src) == 27 && src[5] == '-' && src[11] == '-' {
+                       var filters []arvados.Filter
+                       if src[5:12] == "-57u5n-" {
+                               filters = []arvados.Filter{{"uuid", "=", src}}
+                       } else if src[5:12] == "-4zz18-" {
+                               filters = []arvados.Filter{{"object_uuid", "=", src}}
+                       } else {
+                               logger.Error("looks like a UUID but not a log or collection UUID (if it's really a file, prepend './')")
+                               exitcode = 1
+                               continue
+                       }
+                       var resp struct {
+                               Items []struct {
+                                       UUID       string    `json:"uuid"`
+                                       EventType  string    `json:"event_type"`
+                                       EventAt    time.Time `json:"event_at"`
+                                       ObjectUUID string    `json:"object_uuid"`
+                                       Properties struct {
+                                               OldAttributes struct {
+                                                       ManifestText string `json:"manifest_text"`
+                                               } `json:"old_attributes"`
+                                       } `json:"properties"`
+                               }
+                       }
+                       err = client.RequestAndDecode(&resp, "GET", "arvados/v1/logs", nil, arvados.ListOptions{
+                               Limit:   1,
+                               Order:   []string{"event_at desc"},
+                               Filters: filters,
+                       })
                        if err != nil {
-                               logger.WithError(err).Error("error loading manifest data")
+                               logger.WithError(err).Error("error looking up log entry")
+                               exitcode = 1
+                               continue
+                       } else if len(resp.Items) == 0 {
+                               logger.Error("log entry not found")
+                               exitcode = 1
+                               continue
+                       }
+                       logent := resp.Items[0]
+                       logger.WithFields(logrus.Fields{
+                               "uuid":                logent.UUID,
+                               "old_collection_uuid": logent.ObjectUUID,
+                               "logged_event_type":   logent.EventType,
+                               "logged_event_time":   logent.EventAt,
+                               "logged_object_uuid":  logent.ObjectUUID,
+                       }).Info("loaded log entry")
+                       mtxt = logent.Properties.OldAttributes.ManifestText
+                       if mtxt == "" {
+                               logger.Error("log entry properties.old_attributes.manifest_text missing or empty")
                                exitcode = 1
                                continue
                        }
-                       uuid, err := und.RecoverManifest(string(mtxt))
+               } else {
+                       buf, err := ioutil.ReadFile(src)
                        if err != nil {
-                               logger.WithError(err).Error("recovery failed")
+                               logger.WithError(err).Error("failed to load manifest data from file")
                                exitcode = 1
                                continue
                        }
-                       logger.WithField("UUID", uuid).Info("recovery succeeded")
-                       fmt.Fprintln(stdout, uuid)
+                       mtxt = string(buf)
+               }
+               uuid, err := rcvr.RecoverManifest(string(mtxt))
+               if err != nil {
+                       logger.WithError(err).Error("recovery failed")
+                       exitcode = 1
+                       continue
                }
+               logger.WithField("UUID", uuid).Info("recovery succeeded")
+               fmt.Fprintln(stdout, uuid)
        }
        return exitcode
 }
 
-type undeleter struct {
+type recoverer struct {
        client  *arvados.Client
        cluster *arvados.Cluster
        logger  logrus.FieldLogger
@@ -139,8 +204,8 @@ var errNotFound = errors.New("not found")
 
 // Finds the timestamp of the newest copy of blk on svc. Returns
 // errNotFound if blk is not on svc at all.
-func (und undeleter) newestMtime(logger logrus.FieldLogger, blk string, svc arvados.KeepService) (time.Time, error) {
-       found, err := svc.Index(und.client, blk)
+func (rcvr recoverer) newestMtime(logger logrus.FieldLogger, blk string, svc arvados.KeepService) (time.Time, error) {
+       found, err := svc.Index(rcvr.client, blk)
        if err != nil {
                logger.WithError(err).Warn("error getting index")
                return time.Time{}, err
@@ -170,17 +235,17 @@ var errTouchIneffective = errors.New("(BUG?) touch succeeded but had no effect -
 // decide to trash it, all before our recovered collection gets
 // saved. But if the block's timestamp is more recent than blobsigttl,
 // keepstore will refuse to trash it even if told to by keep-balance.
-func (und undeleter) ensureSafe(ctx context.Context, logger logrus.FieldLogger, blk string, svc arvados.KeepService, blobsigttl time.Duration, blobsigexp time.Time) error {
-       if latest, err := und.newestMtime(logger, blk, svc); err != nil {
+func (rcvr recoverer) ensureSafe(ctx context.Context, logger logrus.FieldLogger, blk string, svc arvados.KeepService, blobsigttl time.Duration, blobsigexp time.Time) error {
+       if latest, err := rcvr.newestMtime(logger, blk, svc); err != nil {
                return err
        } else if latest.Add(blobsigttl).After(blobsigexp) {
                return nil
        }
-       if err := svc.Touch(ctx, und.client, blk); err != nil {
+       if err := svc.Touch(ctx, rcvr.client, blk); err != nil {
                return fmt.Errorf("error updating timestamp: %s", err)
        }
        logger.Debug("updated timestamp")
-       if latest, err := und.newestMtime(logger, blk, svc); err == errNotFound {
+       if latest, err := rcvr.newestMtime(logger, blk, svc); err == errNotFound {
                return fmt.Errorf("(BUG?) touch succeeded, but then block did not appear in index")
        } else if err != nil {
                return err
@@ -194,7 +259,7 @@ func (und undeleter) ensureSafe(ctx context.Context, logger logrus.FieldLogger,
 // Untrash and update GC timestamps (as needed) on blocks referenced
 // by the given manifest, save a new collection and return the new
 // collection's UUID.
-func (und undeleter) RecoverManifest(mtxt string) (string, error) {
+func (rcvr recoverer) RecoverManifest(mtxt string) (string, error) {
        ctx, cancel := context.WithCancel(context.Background())
        defer cancel()
 
@@ -210,9 +275,9 @@ func (und undeleter) RecoverManifest(mtxt string) (string, error) {
        go close(todo)
 
        var services []arvados.KeepService
-       err = und.client.EachKeepService(func(svc arvados.KeepService) error {
+       err = rcvr.client.EachKeepService(func(svc arvados.KeepService) error {
                if svc.ServiceType == "proxy" {
-                       und.logger.WithField("service", svc).Debug("ignore proxy service")
+                       rcvr.logger.WithField("service", svc).Debug("ignore proxy service")
                } else {
                        services = append(services, svc)
                }
@@ -221,7 +286,7 @@ func (und undeleter) RecoverManifest(mtxt string) (string, error) {
        if err != nil {
                return "", fmt.Errorf("error getting list of keep services: %s", err)
        }
-       und.logger.WithField("services", services).Debug("got list of services")
+       rcvr.logger.WithField("services", services).Debug("got list of services")
 
        // blobsigexp is our deadline for saving the rescued
        // collection. This must be less than BlobSigningTTL
@@ -235,9 +300,9 @@ func (und undeleter) RecoverManifest(mtxt string) (string, error) {
        // would have lived long enough anyway if left alone.
        // BlobSigningTTL/2 (typically around 1 week) is much longer
        // than than we need to recover even a very large collection.
-       blobsigttl := und.cluster.Collections.BlobSigningTTL.Duration()
+       blobsigttl := rcvr.cluster.Collections.BlobSigningTTL.Duration()
        blobsigexp := time.Now().Add(blobsigttl / 2)
-       und.logger.WithField("blobsigexp", blobsigexp).Debug("chose save deadline")
+       rcvr.logger.WithField("blobsigexp", blobsigexp).Debug("chose save deadline")
 
        // We'll start a number of threads, each working on
        // checking/recovering one block at a time. The threads
@@ -255,18 +320,18 @@ func (und undeleter) RecoverManifest(mtxt string) (string, error) {
                nextblk:
                        for idx := range todo {
                                blk := strings.SplitN(string(blks[idx]), "+", 2)[0]
-                               logger := und.logger.WithField("block", blk)
+                               logger := rcvr.logger.WithField("block", blk)
                                for _, untrashing := range []bool{false, true} {
                                        for _, svc := range services {
                                                logger := logger.WithField("service", fmt.Sprintf("%s:%d", svc.ServiceHost, svc.ServicePort))
                                                if untrashing {
-                                                       if err := svc.Untrash(ctx, und.client, blk); err != nil {
+                                                       if err := svc.Untrash(ctx, rcvr.client, blk); err != nil {
                                                                logger.WithError(err).Debug("untrash failed")
                                                                continue
                                                        }
                                                        logger.Info("untrashed")
                                                }
-                                               err := und.ensureSafe(ctx, logger, blk, svc, blobsigttl, blobsigexp)
+                                               err := rcvr.ensureSafe(ctx, logger, blk, svc, blobsigttl, blobsigexp)
                                                if err == errNotFound {
                                                        logger.Debug(err)
                                                } else if err != nil {
@@ -293,17 +358,17 @@ func (und undeleter) RecoverManifest(mtxt string) (string, error) {
        }
        if havenot > 0 {
                if have > 0 {
-                       und.logger.Warn("partial recovery is not implemented")
+                       rcvr.logger.Warn("partial recovery is not implemented")
                }
                return "", fmt.Errorf("unable to recover %d of %d blocks", havenot, have+havenot)
        }
 
-       if und.cluster.Collections.BlobSigning {
-               key := []byte(und.cluster.Collections.BlobSigningKey)
-               coll.ManifestText = arvados.SignManifest(coll.ManifestText, und.client.AuthToken, blobsigexp, blobsigttl, key)
+       if rcvr.cluster.Collections.BlobSigning {
+               key := []byte(rcvr.cluster.Collections.BlobSigningKey)
+               coll.ManifestText = arvados.SignManifest(coll.ManifestText, rcvr.client.AuthToken, blobsigexp, blobsigttl, key)
        }
-       und.logger.WithField("manifest", coll.ManifestText).Debug("updated blob signatures in manifest")
-       err = und.client.RequestAndDecodeContext(ctx, &coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
+       rcvr.logger.WithField("manifest", coll.ManifestText).Debug("updated blob signatures in manifest")
+       err = rcvr.client.RequestAndDecodeContext(ctx, &coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
                "collection": map[string]interface{}{
                        "manifest_text": coll.ManifestText,
                },
@@ -311,6 +376,6 @@ func (und undeleter) RecoverManifest(mtxt string) (string, error) {
        if err != nil {
                return "", fmt.Errorf("error saving new collection: %s", err)
        }
-       und.logger.WithField("UUID", coll.UUID).Debug("created new collection")
+       rcvr.logger.WithField("UUID", coll.UUID).Debug("created new collection")
        return coll.UUID, nil
 }
similarity index 76%
rename from lib/undelete/cmd_test.go
rename to lib/recovercollection/cmd_test.go
index a5edaf90b3190d305b09a7f019e34b92992ab609..57c2c64cdab01289911043c5767ed92edbcd8c36 100644 (file)
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package undelete
+package recovercollection
 
 import (
        "bytes"
@@ -36,7 +36,7 @@ func (*Suite) TestUnrecoverableBlock(c *check.C) {
        mfile := tmp + "/manifest"
        ioutil.WriteFile(mfile, []byte(". aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+410 0:410:Gone\n"), 0777)
        var stdout, stderr bytes.Buffer
-       exitcode := Command.RunCommand("undelete.test", []string{"-log-level=debug", mfile}, &bytes.Buffer{}, &stdout, &stderr)
+       exitcode := Command.RunCommand("recovercollection.test", []string{"-log-level=debug", mfile}, &bytes.Buffer{}, &stdout, &stderr)
        c.Check(exitcode, check.Equals, 1)
        c.Check(stdout.String(), check.Equals, "")
        c.Log(stderr.String())
@@ -93,7 +93,7 @@ func (*Suite) TestUntrashAndTouchBlock(c *check.C) {
        }
 
        var stdout, stderr bytes.Buffer
-       exitcode := Command.RunCommand("undelete.test", []string{"-log-level=debug", mfile}, &bytes.Buffer{}, &stdout, &stderr)
+       exitcode := Command.RunCommand("recovercollection.test", []string{"-log-level=debug", mfile}, &bytes.Buffer{}, &stdout, &stderr)
        c.Check(exitcode, check.Equals, 0)
        c.Check(stdout.String(), check.Matches, `zzzzz-4zz18-.{15}\n`)
        c.Log(stderr.String())
@@ -115,3 +115,22 @@ func (*Suite) TestUntrashAndTouchBlock(c *check.C) {
        }
        c.Check(found, check.Equals, true)
 }
+
+func (*Suite) TestUnusableManifestSourceArg(c *check.C) {
+       for _, trial := range []struct {
+               srcArg    string
+               errRegexp string
+       }{
+               {"zzzzz-4zz18-aaaaaaaaaaaaaaa", `(?ms).*msg="log entry not found".*`},
+               {"zzzzz-57u5n-aaaaaaaaaaaaaaa", `(?ms).*msg="log entry not found.*`},
+               {"zzzzz-57u5n-containerlog006", `(?ms).*msg="log entry properties\.old_attributes\.manifest_text missing or empty".*`},
+               {"zzzzz-j7d0g-aaaaaaaaaaaaaaa", `(?ms).*msg="looks like a UUID but not a log or collection UUID.*`},
+       } {
+               var stdout, stderr bytes.Buffer
+               exitcode := Command.RunCommand("recovercollection.test", []string{"-log-level=debug", trial.srcArg}, &bytes.Buffer{}, &stdout, &stderr)
+               c.Check(exitcode, check.Equals, 1)
+               c.Check(stdout.String(), check.Equals, "")
+               c.Log(stderr.String())
+               c.Check(stderr.String(), check.Matches, trial.errRegexp)
+       }
+}
index 5026e2d32558e085886ba119cf0b664bfbc58473..1706473cc892c43cbd5ad27751c49f43cbebc075 100644 (file)
@@ -699,10 +699,20 @@ func (v *UnixVolume) EmptyTrash() {
        err := filepath.Walk(v.Root, func(path string, info os.FileInfo, err error) error {
                if err != nil {
                        v.logger.WithError(err).Errorf("EmptyTrash: filepath.Walk(%q) failed", path)
+                       // Don't give up -- keep walking other
+                       // files/dirs
                        return nil
+               } else if !info.Mode().IsDir() {
+                       todo <- dirent{path, info}
+                       return nil
+               } else if path == v.Root || blockDirRe.MatchString(info.Name()) {
+                       // Descend into a directory that we might have
+                       // put trash in.
+                       return nil
+               } else {
+                       // Don't descend into other dirs.
+                       return filepath.SkipDir
                }
-               todo <- dirent{path, info}
-               return nil
        })
        close(todo)
        wg.Wait()
index 5a3a536944daa5b8012bc0b2afbf8b6932862364..6b42dbc519ac933a0ddca0092fc1b14fb1b599d8 100644 (file)
@@ -424,3 +424,26 @@ func (s *UnixVolumeSuite) TestStats(c *check.C) {
        c.Check(err, check.IsNil)
        c.Check(stats(), check.Matches, `.*"FlockOps":2,.*`)
 }
+
+func (s *UnixVolumeSuite) TestSkipUnusedDirs(c *check.C) {
+       vol := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
+
+       err := os.Mkdir(vol.UnixVolume.Root+"/aaa", 0777)
+       c.Assert(err, check.IsNil)
+       err = os.Mkdir(vol.UnixVolume.Root+"/.aaa", 0777) // EmptyTrash should not look here
+       c.Assert(err, check.IsNil)
+       deleteme := vol.UnixVolume.Root + "/aaa/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.trash.1"
+       err = ioutil.WriteFile(deleteme, []byte{1, 2, 3}, 0777)
+       c.Assert(err, check.IsNil)
+       skipme := vol.UnixVolume.Root + "/.aaa/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.trash.1"
+       err = ioutil.WriteFile(skipme, []byte{1, 2, 3}, 0777)
+       c.Assert(err, check.IsNil)
+       vol.EmptyTrash()
+
+       _, err = os.Stat(skipme)
+       c.Check(err, check.IsNil)
+
+       _, err = os.Stat(deleteme)
+       c.Check(err, check.NotNil)
+       c.Check(os.IsNotExist(err), check.Equals, true)
+}