"git.arvados.org/arvados.git/lib/crunchrun"
"git.arvados.org/arvados.git/lib/dispatchcloud"
"git.arvados.org/arvados.git/lib/install"
- "git.arvados.org/arvados.git/lib/undelete"
+ "git.arvados.org/arvados.git/lib/recovercollection"
"git.arvados.org/arvados.git/services/ws"
)
"-version": cmd.Version,
"--version": cmd.Version,
- "boot": boot.Command,
- "cloudtest": cloudtest.Command,
- "config-check": config.CheckCommand,
- "config-defaults": config.DumpDefaultsCommand,
- "config-dump": config.DumpCommand,
- "controller": controller.Command,
- "crunch-run": crunchrun.Command,
- "dispatch-cloud": dispatchcloud.Command,
- "install": install.Command,
- "undelete": undelete.Command,
- "ws": ws.Command,
+ "boot": boot.Command,
+ "cloudtest": cloudtest.Command,
+ "config-check": config.CheckCommand,
+ "config-defaults": config.DumpDefaultsCommand,
+ "config-dump": config.DumpCommand,
+ "controller": controller.Command,
+ "crunch-run": crunchrun.Command,
+ "dispatch-cloud": dispatchcloud.Command,
+ "install": install.Command,
+ "recover-collection": recovercollection.Command,
+ "ws": ws.Command,
})
)
---
layout: default
navsection: admin
-title: Undeleting collections
+title: Recovering deleted collections
...
{% comment %}
* Whether the data blocks have been unreferenced long enough to be marked for deletion/trash by keep-balance
* Blob signature TTL, trash lifetime, trash check interval, and other config settings
-To attempt recovery of a previous version of a deleted/modified collection, use the @arvados-server undelete@ command. It should be run on one of your server nodes where the @arvados-server@ package is installed and the @/etc/arvados/config.yml@ file is up to date.
+To attempt recovery of a previous version of a deleted/modified collection, use the @arvados-server recover-collection@ command. It should be run on one of your server nodes where the @arvados-server@ package is installed and the @/etc/arvados/config.yml@ file is up to date.
Specify the collection you want to recover by passing either the UUID of an audit log entry, or a file containing the manifest.
-If recovery is successful, the undelete program saves the recovered data a new collection belonging to the system user, and print the new collection's UUID on stdout.
+If recovery is successful, the @recover-collection@ program saves the recovered data a new collection belonging to the system user, and prints the new collection's UUID on stdout.
<pre>
-# arvados-server undelete 9tee4-57u5n-nb5awmk1pahac2t
+# arvados-server recover-collection 9tee4-57u5n-nb5awmk1pahac2t
INFO[2020-06-05T19:52:29.557761245Z] loaded log entry logged_event_time="2020-06-05 16:48:01.438791 +0000 UTC" logged_event_type=update old_collection_uuid=9tee4-4zz18-1ex26g95epmgw5w src=9tee4-57u5n-nb5awmk1pahac2t
INFO[2020-06-05T19:52:29.642145127Z] recovery succeeded UUID=9tee4-4zz18-5trfp4k4xxg97f1 src=9tee4-57u5n-nb5awmk1pahac2t
9tee4-4zz18-5trfp4k4xxg97f1
In this example, the original data has been restored and saved in a new collection with UUID @9tee4-4zz18-5trfp4k4xxg97f1@.
-For more options, run @arvados-server undelete -help@.
+For more options, run @arvados-server recover-collection -help@.
//
// SPDX-License-Identifier: AGPL-3.0
-package undelete
+package recovercollection
import (
"context"
return 1
}
client.AuthToken = cluster.SystemRootToken
- und := undeleter{
+ rcvr := recoverer{
client: client,
cluster: cluster,
logger: logger,
}
mtxt = string(buf)
}
- uuid, err := und.RecoverManifest(string(mtxt))
+ uuid, err := rcvr.RecoverManifest(string(mtxt))
if err != nil {
logger.WithError(err).Error("recovery failed")
exitcode = 1
return exitcode
}
-type undeleter struct {
+type recoverer struct {
client *arvados.Client
cluster *arvados.Cluster
logger logrus.FieldLogger
// Finds the timestamp of the newest copy of blk on svc. Returns
// errNotFound if blk is not on svc at all.
-func (und undeleter) newestMtime(logger logrus.FieldLogger, blk string, svc arvados.KeepService) (time.Time, error) {
- found, err := svc.Index(und.client, blk)
+func (rcvr recoverer) newestMtime(logger logrus.FieldLogger, blk string, svc arvados.KeepService) (time.Time, error) {
+ found, err := svc.Index(rcvr.client, blk)
if err != nil {
logger.WithError(err).Warn("error getting index")
return time.Time{}, err
// decide to trash it, all before our recovered collection gets
// saved. But if the block's timestamp is more recent than blobsigttl,
// keepstore will refuse to trash it even if told to by keep-balance.
-func (und undeleter) ensureSafe(ctx context.Context, logger logrus.FieldLogger, blk string, svc arvados.KeepService, blobsigttl time.Duration, blobsigexp time.Time) error {
- if latest, err := und.newestMtime(logger, blk, svc); err != nil {
+func (rcvr recoverer) ensureSafe(ctx context.Context, logger logrus.FieldLogger, blk string, svc arvados.KeepService, blobsigttl time.Duration, blobsigexp time.Time) error {
+ if latest, err := rcvr.newestMtime(logger, blk, svc); err != nil {
return err
} else if latest.Add(blobsigttl).After(blobsigexp) {
return nil
}
- if err := svc.Touch(ctx, und.client, blk); err != nil {
+ if err := svc.Touch(ctx, rcvr.client, blk); err != nil {
return fmt.Errorf("error updating timestamp: %s", err)
}
logger.Debug("updated timestamp")
- if latest, err := und.newestMtime(logger, blk, svc); err == errNotFound {
+ if latest, err := rcvr.newestMtime(logger, blk, svc); err == errNotFound {
return fmt.Errorf("(BUG?) touch succeeded, but then block did not appear in index")
} else if err != nil {
return err
// Untrash and update GC timestamps (as needed) on blocks referenced
// by the given manifest, save a new collection and return the new
// collection's UUID.
-func (und undeleter) RecoverManifest(mtxt string) (string, error) {
+func (rcvr recoverer) RecoverManifest(mtxt string) (string, error) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
go close(todo)
var services []arvados.KeepService
- err = und.client.EachKeepService(func(svc arvados.KeepService) error {
+ err = rcvr.client.EachKeepService(func(svc arvados.KeepService) error {
if svc.ServiceType == "proxy" {
- und.logger.WithField("service", svc).Debug("ignore proxy service")
+ rcvr.logger.WithField("service", svc).Debug("ignore proxy service")
} else {
services = append(services, svc)
}
if err != nil {
return "", fmt.Errorf("error getting list of keep services: %s", err)
}
- und.logger.WithField("services", services).Debug("got list of services")
+ rcvr.logger.WithField("services", services).Debug("got list of services")
// blobsigexp is our deadline for saving the rescued
// collection. This must be less than BlobSigningTTL
// would have lived long enough anyway if left alone.
// BlobSigningTTL/2 (typically around 1 week) is much longer
// than than we need to recover even a very large collection.
- blobsigttl := und.cluster.Collections.BlobSigningTTL.Duration()
+ blobsigttl := rcvr.cluster.Collections.BlobSigningTTL.Duration()
blobsigexp := time.Now().Add(blobsigttl / 2)
- und.logger.WithField("blobsigexp", blobsigexp).Debug("chose save deadline")
+ rcvr.logger.WithField("blobsigexp", blobsigexp).Debug("chose save deadline")
// We'll start a number of threads, each working on
// checking/recovering one block at a time. The threads
nextblk:
for idx := range todo {
blk := strings.SplitN(string(blks[idx]), "+", 2)[0]
- logger := und.logger.WithField("block", blk)
+ logger := rcvr.logger.WithField("block", blk)
for _, untrashing := range []bool{false, true} {
for _, svc := range services {
logger := logger.WithField("service", fmt.Sprintf("%s:%d", svc.ServiceHost, svc.ServicePort))
if untrashing {
- if err := svc.Untrash(ctx, und.client, blk); err != nil {
+ if err := svc.Untrash(ctx, rcvr.client, blk); err != nil {
logger.WithError(err).Debug("untrash failed")
continue
}
logger.Info("untrashed")
}
- err := und.ensureSafe(ctx, logger, blk, svc, blobsigttl, blobsigexp)
+ err := rcvr.ensureSafe(ctx, logger, blk, svc, blobsigttl, blobsigexp)
if err == errNotFound {
logger.Debug(err)
} else if err != nil {
}
if havenot > 0 {
if have > 0 {
- und.logger.Warn("partial recovery is not implemented")
+ rcvr.logger.Warn("partial recovery is not implemented")
}
return "", fmt.Errorf("unable to recover %d of %d blocks", havenot, have+havenot)
}
- if und.cluster.Collections.BlobSigning {
- key := []byte(und.cluster.Collections.BlobSigningKey)
- coll.ManifestText = arvados.SignManifest(coll.ManifestText, und.client.AuthToken, blobsigexp, blobsigttl, key)
+ if rcvr.cluster.Collections.BlobSigning {
+ key := []byte(rcvr.cluster.Collections.BlobSigningKey)
+ coll.ManifestText = arvados.SignManifest(coll.ManifestText, rcvr.client.AuthToken, blobsigexp, blobsigttl, key)
}
- und.logger.WithField("manifest", coll.ManifestText).Debug("updated blob signatures in manifest")
- err = und.client.RequestAndDecodeContext(ctx, &coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
+ rcvr.logger.WithField("manifest", coll.ManifestText).Debug("updated blob signatures in manifest")
+ err = rcvr.client.RequestAndDecodeContext(ctx, &coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
"collection": map[string]interface{}{
"manifest_text": coll.ManifestText,
},
if err != nil {
return "", fmt.Errorf("error saving new collection: %s", err)
}
- und.logger.WithField("UUID", coll.UUID).Debug("created new collection")
+ rcvr.logger.WithField("UUID", coll.UUID).Debug("created new collection")
return coll.UUID, nil
}