From: Tom Clegg Date: Thu, 10 Mar 2022 21:30:27 +0000 (-0500) Subject: 18600: Move "splices" to "update_files" update/create option. X-Git-Tag: 2.4.0~47^2~4 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/55723c549ef42d76584a9791036a12c68ffb95eb 18600: Move "splices" to "update_files" update/create option. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/doc/api/methods/collections.html.textile.liquid b/doc/api/methods/collections.html.textile.liquid index bea19bb75a..ea67937e80 100644 --- a/doc/api/methods/collections.html.textile.liquid +++ b/doc/api/methods/collections.html.textile.liquid @@ -71,9 +71,10 @@ Arguments: table(table table-bordered table-condensed). |_. Argument |_. Type |_. Description |_. Location |_. Example | -|collection|object||query|| +|collection|object||body|| +|replace_files|object|Initialize files and directories using content from other collections|body|| -The new collection's content can be initialized by providing a @manifest_text@ or @splices@ key in the provided @collection@ object (see "splices":#splices below). +The new collection's content can be initialized by providing a @manifest_text@ key in the provided @collection@ object, or by using the @replace_files@ option (see "replace_files":#replace_files below). h3. delete @@ -117,9 +118,10 @@ Arguments: table(table table-bordered table-condensed). |_. Argument |_. Type |_. Description |_. Location |_. Example | {background:#ccffcc}.|uuid|string|The UUID of the Collection in question.|path|| -|collection|object||query|| +|collection|object||body|| +|replace_files|object|Delete and replace files and directories using content from other collections|body|| -The collection's content can be updated by providing a @manifest_text@ or @splices@ key in the provided @collection@ object (see "splices":#splices below). +The collection's content can be updated by providing a @manifest_text@ key in the provided @collection@ object, or by using the @replace_files@ option (see "replace_files":#replace_files below). h3. untrash @@ -165,11 +167,11 @@ table(table table-bordered table-condensed). |_. Argument |_. Type |_. Description |_. Location |_. Example | {background:#ccffcc}.|uuid|string|The UUID of the Collection to get usage.|path|| -h2(#splices). Using "splices" to create/update collections +h2(#replace_files). Using "replace_files" to create/update collections -The @splices@ attribute can be used with the @create@ and @update@ APIs to efficiently copy individual files and directory trees from other collections, and copy/rename/delete items within an existing collection, without transferring any file data. +The @replace_files@ option can be used with the @create@ and @update@ APIs to efficiently copy individual files and directory trees from other collections, and copy/rename/delete items within an existing collection, without transferring any file data. -@splices@ keys indicate target paths in the new collection, and values specify sources that should be copied to the target paths. +@replace_files@ keys indicate target paths in the new collection, and values specify sources that should be copied to the target paths. * Each target path must be an absolute canonical path beginning with @/@. It must not contain @.@ or @..@ components, consecutive @/@ characters, or a trailing @/@ after the final component. * Each source must be either an empty string (signifying that the target path is to be deleted), or @PDH/path@ where @PDH@ is the portable data hash of a collection on the cluster and @/path@ is a file or directory in that collection. * In an @update@ request, sources may reference the current portable data hash of the collection being updated. @@ -177,53 +179,43 @@ The @splices@ attribute can be used with the @create@ and @update@ APIs to effic Example: delete @foo.txt@ from a collection
-"collection": {
-  "splices": {
-    "/foo.txt": ""
-  }
+"replace_files": {
+  "/foo.txt": ""
 }
 
Example: rename @foo.txt@ to @bar.txt@ in a collection with portable data hash @fa7aeb5140e2848d39b416daeef4ffc5+45@
-"collection": {
-  "splices": {
-    "/foo.txt": "",
-    "/bar.txt": "fa7aeb5140e2848d39b416daeef4ffc5+45/foo.txt"
-  }
+"replace_files": {
+  "/foo.txt": "",
+  "/bar.txt": "fa7aeb5140e2848d39b416daeef4ffc5+45/foo.txt"
 }
 
Example: delete current contents, then add content from multiple collections
-"collection": {
-  "splices": {
-    "/": "",
-    "/copy of collection 1": "1f4b0bc7583c2a7f9102c395f4ffc5e3+45/",
-    "/copy of collection 2": "ea10d51bcf88862dbcc36eb292017dfd+45/"
-  }
+"replace_files": {
+  "/": "",
+  "/copy of collection 1": "1f4b0bc7583c2a7f9102c395f4ffc5e3+45/",
+  "/copy of collection 2": "ea10d51bcf88862dbcc36eb292017dfd+45/"
 }
 
Example: replace entire collection with a copy of a subdirectory from another collection
-"collection": {
-  "splices": {
-    "/": "1f4b0bc7583c2a7f9102c395f4ffc5e3+45/subdir"
-  }
+"replace_files": {
+  "/": "1f4b0bc7583c2a7f9102c395f4ffc5e3+45/subdir"
 }
 
A target path with a non-empty source cannot be the ancestor of another target path in the same request. For example, the following request is invalid:
-"collection": {
-  "splices": {
-    "/foo": "fa7aeb5140e2848d39b416daeef4ffc5+45/",
-    "/foo/this_will_return_an_error": ""
-  }
+"replace_files": {
+  "/foo": "fa7aeb5140e2848d39b416daeef4ffc5+45/",
+  "/foo/this_will_return_an_error": ""
 }
 
diff --git a/lib/controller/localdb/collection.go b/lib/controller/localdb/collection.go index 2283b2fcb5..868e466e9e 100644 --- a/lib/controller/localdb/collection.go +++ b/lib/controller/localdb/collection.go @@ -68,7 +68,7 @@ func (conn *Conn) CollectionCreate(ctx context.Context, opts arvados.CreateOptio // them. opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...) } - if err := conn.applySplices(ctx, "", opts.Attrs); err != nil { + if opts.Attrs, err = conn.applyReplaceFilesOption(ctx, "", opts.Attrs, opts.ReplaceFiles); err != nil { return arvados.Collection{}, err } resp, err := conn.railsProxy.CollectionCreate(ctx, opts) @@ -92,7 +92,7 @@ func (conn *Conn) CollectionUpdate(ctx context.Context, opts arvados.UpdateOptio // them. opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...) } - if err := conn.applySplices(ctx, opts.UUID, opts.Attrs); err != nil { + if opts.Attrs, err = conn.applyReplaceFilesOption(ctx, opts.UUID, opts.Attrs, opts.ReplaceFiles); err != nil { return arvados.Collection{}, err } resp, err := conn.railsProxy.CollectionUpdate(ctx, opts) @@ -122,64 +122,42 @@ func (conn *Conn) signCollection(ctx context.Context, coll *arvados.Collection) coll.ManifestText = arvados.SignManifest(coll.ManifestText, token, exp, ttl, []byte(conn.cluster.Collections.BlobSigningKey)) } -// If attrs["splices"] is present, populate attrs["manifest_text"] by +// If replaceFiles is non-empty, populate attrs["manifest_text"] by // starting with the content of fromUUID (or an empty collection if -// fromUUID is empty) and applying the specified splice operations. -func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[string]interface{}) error { - var splices map[string]string - - // Validate the incoming attrs, and return early if the - // request doesn't ask for any splices. - if sp, ok := attrs["splices"]; !ok { - return nil - } else { - switch sp := sp.(type) { - default: - return httpserver.Errorf(http.StatusBadRequest, "invalid type %T for splices parameter", sp) - case nil: - return nil - case map[string]string: - splices = sp - case map[string]interface{}: - splices = make(map[string]string, len(sp)) - for dst, src := range sp { - if src, ok := src.(string); ok { - splices[dst] = src - } else { - return httpserver.Errorf(http.StatusBadRequest, "invalid source type for splice target %q: %v", dst, src) - } - } - } - if len(splices) == 0 { - return nil - } else if mtxt, ok := attrs["manifest_text"].(string); ok && len(mtxt) > 0 { - return httpserver.Errorf(http.StatusBadRequest, "ambiguous request: both 'splices' and 'manifest_text' values provided") - } +// fromUUID is empty) and applying the specified file/directory +// replacements. +// +// Return value is the (possibly modified) attrs map. +func (conn *Conn) applyReplaceFilesOption(ctx context.Context, fromUUID string, attrs map[string]interface{}, replaceFiles map[string]string) (map[string]interface{}, error) { + if len(replaceFiles) == 0 { + return attrs, nil + } else if mtxt, ok := attrs["manifest_text"].(string); ok && len(mtxt) > 0 { + return nil, httpserver.Errorf(http.StatusBadRequest, "ambiguous request: both 'replace_files' and attrs['manifest_text'] values provided") } // Load the current collection (if any) and set up an // in-memory filesystem. var dst arvados.Collection - if _, rootsplice := splices["/"]; !rootsplice && fromUUID != "" { + if _, replacingRoot := replaceFiles["/"]; !replacingRoot && fromUUID != "" { src, err := conn.CollectionGet(ctx, arvados.GetOptions{UUID: fromUUID}) if err != nil { - return err + return nil, err } dst = src } dstfs, err := dst.FileSystem(&arvados.StubClient{}, &arvados.StubClient{}) if err != nil { - return err + return nil, err } - // Sort splices by source collection to avoid redundant + // Sort replacements by source collection to avoid redundant // reloads when a source collection is used more than // once. Note empty sources (which mean "delete target path") // sort first. - dstTodo := make([]string, 0, len(splices)) + dstTodo := make([]string, 0, len(replaceFiles)) { - srcid := make(map[string]string, len(splices)) - for dst, src := range splices { + srcid := make(map[string]string, len(replaceFiles)) + for dst, src := range replaceFiles { dstTodo = append(dstTodo, dst) if i := strings.IndexRune(src, '/'); i > 0 { srcid[dst] = src[:i] @@ -190,7 +168,7 @@ func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[s }) } - // Reject attempt to splice a node as well as its descendant + // Reject attempt to replace a node as well as its descendant // (e.g., a/ and a/b/), which is unsupported, except where the // source for a/ is empty (i.e., delete). for _, dst := range dstTodo { @@ -201,7 +179,7 @@ func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[s strings.Contains(dst, "/./") || strings.Contains(dst, "/../") || !strings.HasPrefix(dst, "/")) { - return httpserver.Errorf(http.StatusBadRequest, "invalid splice target: %q", dst) + return nil, httpserver.Errorf(http.StatusBadRequest, "invalid replace_files target: %q", dst) } for i := 0; i < len(dst)-1; i++ { if dst[i] != '/' { @@ -211,17 +189,17 @@ func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[s if outerdst == "" { outerdst = "/" } - if outersrc := splices[outerdst]; outersrc != "" { - return httpserver.Errorf(http.StatusBadRequest, "cannot splice at target %q with non-empty splice at %q", dst, outerdst) + if outersrc := replaceFiles[outerdst]; outersrc != "" { + return nil, httpserver.Errorf(http.StatusBadRequest, "replace_files: cannot operate on target %q inside non-empty target %q", dst, outerdst) } } } var srcidloaded string var srcfs arvados.FileSystem - // Apply the requested splices. + // Apply the requested replacements. for _, dst := range dstTodo { - src := splices[dst] + src := replaceFiles[dst] if src == "" { if dst == "/" { // In this case we started with a @@ -231,14 +209,14 @@ func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[s } err := dstfs.RemoveAll(dst) if err != nil { - return fmt.Errorf("RemoveAll(%s): %w", dst, err) + return nil, fmt.Errorf("RemoveAll(%s): %w", dst, err) } continue } srcspec := strings.SplitN(src, "/", 2) srcid, srcpath := srcspec[0], "/" if !arvadosclient.PDHMatch(srcid) { - return httpserver.Errorf(http.StatusBadRequest, "invalid source %q for splices[%q]: must be \"\" or \"PDH\" or \"PDH/path\"", src, dst) + return nil, httpserver.Errorf(http.StatusBadRequest, "invalid source %q for replace_files[%q]: must be \"\" or \"PDH\" or \"PDH/path\"", src, dst) } if len(srcspec) == 2 && srcspec[1] != "" { srcpath = srcspec[1] @@ -247,20 +225,20 @@ func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[s srcfs = nil srccoll, err := conn.CollectionGet(ctx, arvados.GetOptions{UUID: srcid}) if err != nil { - return err + return nil, err } // We use StubClient here because we don't // want srcfs to read/write any file data or // sync collection state to/from the database. srcfs, err = srccoll.FileSystem(&arvados.StubClient{}, &arvados.StubClient{}) if err != nil { - return err + return nil, err } srcidloaded = srcid } snap, err := arvados.Snapshot(srcfs, srcpath) if err != nil { - return httpserver.Errorf(http.StatusBadRequest, "error getting snapshot of %q from %q: %w", srcpath, srcid, err) + return nil, httpserver.Errorf(http.StatusBadRequest, "error getting snapshot of %q from %q: %w", srcpath, srcid, err) } // Create intermediate dirs, in case dst is // "newdir1/newdir2/dst". @@ -268,20 +246,22 @@ func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[s if dst[i] == '/' { err = dstfs.Mkdir(dst[:i], 0777) if err != nil && !os.IsExist(err) { - return httpserver.Errorf(http.StatusBadRequest, "error creating parent dirs for %q: %w", dst, err) + return nil, httpserver.Errorf(http.StatusBadRequest, "error creating parent dirs for %q: %w", dst, err) } } } err = arvados.Splice(dstfs, dst, snap) if err != nil { - return fmt.Errorf("error splicing snapshot onto path %q: %w", dst, err) + return nil, fmt.Errorf("error splicing snapshot onto path %q: %w", dst, err) } } mtxt, err := dstfs.MarshalManifest(".") if err != nil { - return err + return nil, err + } + if attrs == nil { + attrs = make(map[string]interface{}, 1) } - delete(attrs, "splices") attrs["manifest_text"] = mtxt - return nil + return attrs, nil } diff --git a/lib/controller/localdb/collection_test.go b/lib/controller/localdb/collection_test.go index e71ae35c21..dac8b769fe 100644 --- a/lib/controller/localdb/collection_test.go +++ b/lib/controller/localdb/collection_test.go @@ -125,7 +125,7 @@ func (s *CollectionSuite) TestCollectionCreateAndUpdateWithProperties(c *check.C } } -func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) { +func (s *CollectionSuite) TestCollectionReplaceFiles(c *check.C) { ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.AdminToken}}) foo, err := s.localdb.railsProxy.CollectionCreate(ctx, arvados.CreateOptions{ Attrs: map[string]interface{}{ @@ -153,14 +153,14 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) { // Create using content from existing collections dst, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{ + ReplaceFiles: map[string]string{ + "/f": foo.PortableDataHash + "/foo.txt", + "/b": foobarbaz.PortableDataHash + "/foo/bar", + "/q": wazqux.PortableDataHash + "/", + "/w": wazqux.PortableDataHash + "/waz", + }, Attrs: map[string]interface{}{ "owner_uuid": arvadostest.ActiveUserUUID, - "splices": map[string]string{ - "/f": foo.PortableDataHash + "/foo.txt", - "/b": foobarbaz.PortableDataHash + "/foo/bar", - "/q": wazqux.PortableDataHash + "/", - "/w": wazqux.PortableDataHash + "/waz", - }, }}) c.Assert(err, check.IsNil) s.expectFiles(c, dst, "f", "b/baz.txt", "q/waz/qux.txt", "w/qux.txt") @@ -168,11 +168,9 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) { // Delete a file and a directory dst, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{ UUID: dst.UUID, - Attrs: map[string]interface{}{ - "splices": map[string]string{ - "/f": "", - "/q/waz": "", - }, + ReplaceFiles: map[string]string{ + "/f": "", + "/q/waz": "", }}) c.Assert(err, check.IsNil) s.expectFiles(c, dst, "b/baz.txt", "q/", "w/qux.txt") @@ -180,15 +178,12 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) { // Move and copy content within collection dst, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{ UUID: dst.UUID, - Attrs: map[string]interface{}{ - "splices": map[string]string{ - // Note splicing content to - // /b/corge.txt but removing - // everything else from /b - "/b": "", - "/b/corge.txt": dst.PortableDataHash + "/b/baz.txt", - "/quux/corge.txt": dst.PortableDataHash + "/b/baz.txt", - }, + ReplaceFiles: map[string]string{ + // Note splicing content to /b/corge.txt but + // removing everything else from /b + "/b": "", + "/b/corge.txt": dst.PortableDataHash + "/b/baz.txt", + "/quux/corge.txt": dst.PortableDataHash + "/b/baz.txt", }}) c.Assert(err, check.IsNil) s.expectFiles(c, dst, "b/corge.txt", "q/", "w/qux.txt", "quux/corge.txt") @@ -196,31 +191,24 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) { // Remove everything except one file dst, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{ UUID: dst.UUID, - Attrs: map[string]interface{}{ - "splices": map[string]string{ - "/": "", - "/b/corge.txt": dst.PortableDataHash + "/b/corge.txt", - }, + ReplaceFiles: map[string]string{ + "/": "", + "/b/corge.txt": dst.PortableDataHash + "/b/corge.txt", }}) c.Assert(err, check.IsNil) s.expectFiles(c, dst, "b/corge.txt") // Copy entire collection to root dstcopy, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{ - Attrs: map[string]interface{}{ - // Note map[string]interface{} here, which is - // how lib/controller/router requests will - // look. - "splices": map[string]interface{}{ - "/": dst.PortableDataHash, - }, + ReplaceFiles: map[string]string{ + "/": dst.PortableDataHash, }}) c.Check(err, check.IsNil) c.Check(dstcopy.PortableDataHash, check.Equals, dst.PortableDataHash) s.expectFiles(c, dstcopy, "b/corge.txt") // Check invalid targets, sources, and combinations - for _, splices := range []map[string]string{ + for _, badrepl := range []map[string]string{ { "/foo/nope": dst.PortableDataHash + "/b", "/foo": dst.PortableDataHash + "/b", @@ -250,40 +238,22 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) { {"/bad": dst.UUID + "/b"}, } { _, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{ - UUID: dst.UUID, - Attrs: map[string]interface{}{ - "splices": splices, - }}) - c.Logf("splices %#v\n... got err: %s", splices, err) + UUID: dst.UUID, + ReplaceFiles: badrepl, + }) + c.Logf("badrepl %#v\n... got err: %s", badrepl, err) c.Check(err, check.NotNil) } - // Check "splices" value that isn't even the right type - for _, splices := range []interface{}{ - map[string]int{"foo": 1}, - map[int]string{1: "foo"}, - 12345, - "foo", - []string{"foo"}, - } { - _, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{ - UUID: dst.UUID, - Attrs: map[string]interface{}{ - "splices": splices, - }}) - c.Logf("splices %#v\n... got err: %s", splices, err) - c.Check(err, check.ErrorMatches, "invalid type .* for splices parameter") - } - - // Check conflicting splices and manifest_text + // Check conflicting replace_files and manifest_text _, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{ - UUID: dst.UUID, + UUID: dst.UUID, + ReplaceFiles: map[string]string{"/": ""}, Attrs: map[string]interface{}{ - "splices": map[string]string{"/": ""}, "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:z\n", }}) - c.Logf("splices+manifest_text\n... got err: %s", err) - c.Check(err, check.ErrorMatches, "ambiguous request: both.*splices.*manifest_text.*") + c.Logf("replace_files+manifest_text\n... got err: %s", err) + c.Check(err, check.ErrorMatches, "ambiguous request: both.*replace_files.*manifest_text.*") } // expectFiles checks coll's directory structure against the given diff --git a/sdk/go/arvados/api.go b/sdk/go/arvados/api.go index 7409b18132..d76ece1edd 100644 --- a/sdk/go/arvados/api.go +++ b/sdk/go/arvados/api.go @@ -139,6 +139,8 @@ type CreateOptions struct { EnsureUniqueName bool `json:"ensure_unique_name"` Select []string `json:"select"` Attrs map[string]interface{} `json:"attrs"` + // ReplaceFiles only applies when creating a collection. + ReplaceFiles map[string]string `json:"replace_files"` } type UpdateOptions struct { @@ -146,6 +148,8 @@ type UpdateOptions struct { Attrs map[string]interface{} `json:"attrs"` Select []string `json:"select"` BypassFederation bool `json:"bypass_federation"` + // ReplaceFiles only applies when updating a collection. + ReplaceFiles map[string]string `json:"replace_files"` } type GroupContentsOptions struct { diff --git a/services/api/app/controllers/arvados/v1/schema_controller.rb b/services/api/app/controllers/arvados/v1/schema_controller.rb index 59ac639baf..ac2367aa0d 100644 --- a/services/api/app/controllers/arvados/v1/schema_controller.rb +++ b/services/api/app/controllers/arvados/v1/schema_controller.rb @@ -406,6 +406,20 @@ class Arvados::V1::SchemaController < ApplicationController end end + # The 'replace_files' option is implemented in lib/controller, + # not Rails -- we just need to add it here so discovery-aware + # clients know how to validate it. + [:create, :update].each do |action| + discovery[:resources]['collections'][:methods][action][:parameters]['replace_files'] = { + type: 'object', + description: 'Files and directories to initialize/replace with content from other collections.', + required: false, + location: 'query', + properties: {}, + additionalProperties: {type: 'string'}, + } + end + discovery[:resources]['configs'] = { methods: { get: {