From: Tom Clegg Date: Fri, 11 Mar 2022 16:01:30 +0000 (-0500) Subject: 18600: Merge branch 'main' X-Git-Tag: 2.4.0~47^2~1 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/f7bf9d69603db2d500563648460e2a96524de266?hp=c56098048193b9ba8597b55c761af07a7617c026 18600: Merge branch 'main' Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/doc/api/methods/collections.html.textile.liquid b/doc/api/methods/collections.html.textile.liquid index 01efda2b0c..5ff8d529f8 100644 --- a/doc/api/methods/collections.html.textile.liquid +++ b/doc/api/methods/collections.html.textile.liquid @@ -47,7 +47,7 @@ table(table table-bordered table-condensed). h3. Conditions of creating a Collection -The @portable_data_hash@ and @manifest_text@ attributes must be provided when creating a Collection. The cryptographic digest of the supplied @manifest_text@ must match the supplied @portable_data_hash@. +If a new @portable_data_hash@ is specified when creating or updating a Collection, it must match the cryptographic digest of the supplied @manifest_text@. h3. Side effects of creating a Collection @@ -72,6 +72,9 @@ Arguments: table(table table-bordered table-condensed). |_. Argument |_. Type |_. Description |_. Location |_. Example | |collection|object||query|| +|replace_files|object|Initialize files and directories using content from other collections|query|| + +The new collection's content can be initialized by providing a @manifest_text@ key in the provided @collection@ object, or by using the @replace_files@ option (see "replace_files":#replace_files below). h3. delete @@ -116,6 +119,9 @@ table(table table-bordered table-condensed). |_. Argument |_. Type |_. Description |_. Location |_. Example | {background:#ccffcc}.|uuid|string|The UUID of the Collection in question.|path|| |collection|object||query|| +|replace_files|object|Delete and replace files and directories using content from other collections|query|| + +The collection's content can be updated by providing a @manifest_text@ key in the provided @collection@ object, or by using the @replace_files@ option (see "replace_files":#replace_files below). h3. untrash @@ -160,3 +166,56 @@ Arguments: table(table table-bordered table-condensed). |_. Argument |_. Type |_. Description |_. Location |_. Example | {background:#ccffcc}.|uuid|string|The UUID of the Collection to get usage.|path|| + +h2(#replace_files). Using "replace_files" to create/update collections + +The @replace_files@ option can be used with the @create@ and @update@ APIs to efficiently copy individual files and directory trees from other collections, and copy/rename/delete items within an existing collection, without transferring any file data. + +@replace_files@ keys indicate target paths in the new collection, and values specify sources that should be copied to the target paths. +* Each target path must be an absolute canonical path beginning with @/@. It must not contain @.@ or @..@ components, consecutive @/@ characters, or a trailing @/@ after the final component. +* Each source must be either an empty string (signifying that the target path is to be deleted), or @PDH/path@ where @PDH@ is the portable data hash of a collection on the cluster and @/path@ is a file or directory in that collection. +* In an @update@ request, sources may reference the current portable data hash of the collection being updated. + +Example: delete @foo.txt@ from a collection + +
+"replace_files": {
+  "/foo.txt": ""
+}
+
+ +Example: rename @foo.txt@ to @bar.txt@ in a collection with portable data hash @fa7aeb5140e2848d39b416daeef4ffc5+45@ + +
+"replace_files": {
+  "/foo.txt": "",
+  "/bar.txt": "fa7aeb5140e2848d39b416daeef4ffc5+45/foo.txt"
+}
+
+ +Example: delete current contents, then add content from multiple collections + +
+"replace_files": {
+  "/": "",
+  "/copy of collection 1": "1f4b0bc7583c2a7f9102c395f4ffc5e3+45/",
+  "/copy of collection 2": "ea10d51bcf88862dbcc36eb292017dfd+45/"
+}
+
+ +Example: replace entire collection with a copy of a subdirectory from another collection + +
+"replace_files": {
+  "/": "1f4b0bc7583c2a7f9102c395f4ffc5e3+45/subdir"
+}
+
+ +A target path with a non-empty source cannot be the ancestor of another target path in the same request. For example, the following request is invalid: + +
+"replace_files": {
+  "/foo": "fa7aeb5140e2848d39b416daeef4ffc5+45/",
+  "/foo/this_will_return_an_error": ""
+}
+
diff --git a/lib/controller/localdb/collection.go b/lib/controller/localdb/collection.go index 96c89252ec..868e466e9e 100644 --- a/lib/controller/localdb/collection.go +++ b/lib/controller/localdb/collection.go @@ -6,10 +6,17 @@ package localdb import ( "context" + "fmt" + "net/http" + "os" + "sort" + "strings" "time" "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/auth" + "git.arvados.org/arvados.git/sdk/go/httpserver" ) // CollectionGet defers to railsProxy for everything except blob @@ -61,6 +68,9 @@ func (conn *Conn) CollectionCreate(ctx context.Context, opts arvados.CreateOptio // them. opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...) } + if opts.Attrs, err = conn.applyReplaceFilesOption(ctx, "", opts.Attrs, opts.ReplaceFiles); err != nil { + return arvados.Collection{}, err + } resp, err := conn.railsProxy.CollectionCreate(ctx, opts) if err != nil { return resp, err @@ -82,6 +92,9 @@ func (conn *Conn) CollectionUpdate(ctx context.Context, opts arvados.UpdateOptio // them. opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...) } + if opts.Attrs, err = conn.applyReplaceFilesOption(ctx, opts.UUID, opts.Attrs, opts.ReplaceFiles); err != nil { + return arvados.Collection{}, err + } resp, err := conn.railsProxy.CollectionUpdate(ctx, opts) if err != nil { return resp, err @@ -108,3 +121,147 @@ func (conn *Conn) signCollection(ctx context.Context, coll *arvados.Collection) } coll.ManifestText = arvados.SignManifest(coll.ManifestText, token, exp, ttl, []byte(conn.cluster.Collections.BlobSigningKey)) } + +// If replaceFiles is non-empty, populate attrs["manifest_text"] by +// starting with the content of fromUUID (or an empty collection if +// fromUUID is empty) and applying the specified file/directory +// replacements. +// +// Return value is the (possibly modified) attrs map. +func (conn *Conn) applyReplaceFilesOption(ctx context.Context, fromUUID string, attrs map[string]interface{}, replaceFiles map[string]string) (map[string]interface{}, error) { + if len(replaceFiles) == 0 { + return attrs, nil + } else if mtxt, ok := attrs["manifest_text"].(string); ok && len(mtxt) > 0 { + return nil, httpserver.Errorf(http.StatusBadRequest, "ambiguous request: both 'replace_files' and attrs['manifest_text'] values provided") + } + + // Load the current collection (if any) and set up an + // in-memory filesystem. + var dst arvados.Collection + if _, replacingRoot := replaceFiles["/"]; !replacingRoot && fromUUID != "" { + src, err := conn.CollectionGet(ctx, arvados.GetOptions{UUID: fromUUID}) + if err != nil { + return nil, err + } + dst = src + } + dstfs, err := dst.FileSystem(&arvados.StubClient{}, &arvados.StubClient{}) + if err != nil { + return nil, err + } + + // Sort replacements by source collection to avoid redundant + // reloads when a source collection is used more than + // once. Note empty sources (which mean "delete target path") + // sort first. + dstTodo := make([]string, 0, len(replaceFiles)) + { + srcid := make(map[string]string, len(replaceFiles)) + for dst, src := range replaceFiles { + dstTodo = append(dstTodo, dst) + if i := strings.IndexRune(src, '/'); i > 0 { + srcid[dst] = src[:i] + } + } + sort.Slice(dstTodo, func(i, j int) bool { + return srcid[dstTodo[i]] < srcid[dstTodo[j]] + }) + } + + // Reject attempt to replace a node as well as its descendant + // (e.g., a/ and a/b/), which is unsupported, except where the + // source for a/ is empty (i.e., delete). + for _, dst := range dstTodo { + if dst != "/" && (strings.HasSuffix(dst, "/") || + strings.HasSuffix(dst, "/.") || + strings.HasSuffix(dst, "/..") || + strings.Contains(dst, "//") || + strings.Contains(dst, "/./") || + strings.Contains(dst, "/../") || + !strings.HasPrefix(dst, "/")) { + return nil, httpserver.Errorf(http.StatusBadRequest, "invalid replace_files target: %q", dst) + } + for i := 0; i < len(dst)-1; i++ { + if dst[i] != '/' { + continue + } + outerdst := dst[:i] + if outerdst == "" { + outerdst = "/" + } + if outersrc := replaceFiles[outerdst]; outersrc != "" { + return nil, httpserver.Errorf(http.StatusBadRequest, "replace_files: cannot operate on target %q inside non-empty target %q", dst, outerdst) + } + } + } + + var srcidloaded string + var srcfs arvados.FileSystem + // Apply the requested replacements. + for _, dst := range dstTodo { + src := replaceFiles[dst] + if src == "" { + if dst == "/" { + // In this case we started with a + // blank manifest, so there can't be + // anything to delete. + continue + } + err := dstfs.RemoveAll(dst) + if err != nil { + return nil, fmt.Errorf("RemoveAll(%s): %w", dst, err) + } + continue + } + srcspec := strings.SplitN(src, "/", 2) + srcid, srcpath := srcspec[0], "/" + if !arvadosclient.PDHMatch(srcid) { + return nil, httpserver.Errorf(http.StatusBadRequest, "invalid source %q for replace_files[%q]: must be \"\" or \"PDH\" or \"PDH/path\"", src, dst) + } + if len(srcspec) == 2 && srcspec[1] != "" { + srcpath = srcspec[1] + } + if srcidloaded != srcid { + srcfs = nil + srccoll, err := conn.CollectionGet(ctx, arvados.GetOptions{UUID: srcid}) + if err != nil { + return nil, err + } + // We use StubClient here because we don't + // want srcfs to read/write any file data or + // sync collection state to/from the database. + srcfs, err = srccoll.FileSystem(&arvados.StubClient{}, &arvados.StubClient{}) + if err != nil { + return nil, err + } + srcidloaded = srcid + } + snap, err := arvados.Snapshot(srcfs, srcpath) + if err != nil { + return nil, httpserver.Errorf(http.StatusBadRequest, "error getting snapshot of %q from %q: %w", srcpath, srcid, err) + } + // Create intermediate dirs, in case dst is + // "newdir1/newdir2/dst". + for i := 1; i < len(dst)-1; i++ { + if dst[i] == '/' { + err = dstfs.Mkdir(dst[:i], 0777) + if err != nil && !os.IsExist(err) { + return nil, httpserver.Errorf(http.StatusBadRequest, "error creating parent dirs for %q: %w", dst, err) + } + } + } + err = arvados.Splice(dstfs, dst, snap) + if err != nil { + return nil, fmt.Errorf("error splicing snapshot onto path %q: %w", dst, err) + } + } + mtxt, err := dstfs.MarshalManifest(".") + if err != nil { + return nil, err + } + if attrs == nil { + attrs = make(map[string]interface{}, 1) + } + attrs["manifest_text"] = mtxt + return attrs, nil +} diff --git a/lib/controller/localdb/collection_test.go b/lib/controller/localdb/collection_test.go index bbfb811165..dac8b769fe 100644 --- a/lib/controller/localdb/collection_test.go +++ b/lib/controller/localdb/collection_test.go @@ -6,16 +6,22 @@ package localdb import ( "context" + "io/fs" + "path/filepath" "regexp" + "sort" "strconv" + "strings" "time" "git.arvados.org/arvados.git/lib/config" "git.arvados.org/arvados.git/lib/controller/rpc" "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/arvadosclient" "git.arvados.org/arvados.git/sdk/go/arvadostest" "git.arvados.org/arvados.git/sdk/go/auth" "git.arvados.org/arvados.git/sdk/go/ctxlog" + "git.arvados.org/arvados.git/sdk/go/keepclient" check "gopkg.in/check.v1" ) @@ -71,7 +77,7 @@ func (s *CollectionSuite) setUpVocabulary(c *check.C, testVocabulary string) { s.localdb.vocabularyCache = voc } -func (s *CollectionSuite) TestCollectionCreateWithProperties(c *check.C) { +func (s *CollectionSuite) TestCollectionCreateAndUpdateWithProperties(c *check.C) { s.setUpVocabulary(c, "") ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}}) @@ -88,6 +94,7 @@ func (s *CollectionSuite) TestCollectionCreateWithProperties(c *check.C) { for _, tt := range tests { c.Log(c.TestName()+" ", tt.name) + // Create with properties coll, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{ Select: []string{"uuid", "properties"}, Attrs: map[string]interface{}{ @@ -99,26 +106,9 @@ func (s *CollectionSuite) TestCollectionCreateWithProperties(c *check.C) { } else { c.Assert(err, check.NotNil) } - } -} - -func (s *CollectionSuite) TestCollectionUpdateWithProperties(c *check.C) { - s.setUpVocabulary(c, "") - ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}}) - tests := []struct { - name string - props map[string]interface{} - success bool - }{ - {"Invalid prop key", map[string]interface{}{"Priority": "IDVALIMPORTANCES1"}, false}, - {"Invalid prop value", map[string]interface{}{"IDTAGIMPORTANCES": "high"}, false}, - {"Valid prop key & value", map[string]interface{}{"IDTAGIMPORTANCES": "IDVALIMPORTANCES1"}, true}, - {"Empty properties", map[string]interface{}{}, true}, - } - for _, tt := range tests { - c.Log(c.TestName()+" ", tt.name) - coll, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{}) + // Create, then update with properties + coll, err = s.localdb.CollectionCreate(ctx, arvados.CreateOptions{}) c.Assert(err, check.IsNil) coll, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{ UUID: coll.UUID, @@ -135,6 +125,180 @@ func (s *CollectionSuite) TestCollectionUpdateWithProperties(c *check.C) { } } +func (s *CollectionSuite) TestCollectionReplaceFiles(c *check.C) { + ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.AdminToken}}) + foo, err := s.localdb.railsProxy.CollectionCreate(ctx, arvados.CreateOptions{ + Attrs: map[string]interface{}{ + "owner_uuid": arvadostest.ActiveUserUUID, + "manifest_text": ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n", + }}) + c.Assert(err, check.IsNil) + s.localdb.signCollection(ctx, &foo) + foobarbaz, err := s.localdb.railsProxy.CollectionCreate(ctx, arvados.CreateOptions{ + Attrs: map[string]interface{}{ + "owner_uuid": arvadostest.ActiveUserUUID, + "manifest_text": "./foo/bar 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n", + }}) + c.Assert(err, check.IsNil) + s.localdb.signCollection(ctx, &foobarbaz) + wazqux, err := s.localdb.railsProxy.CollectionCreate(ctx, arvados.CreateOptions{ + Attrs: map[string]interface{}{ + "owner_uuid": arvadostest.ActiveUserUUID, + "manifest_text": "./waz d85b1213473c2fd7c2045020a6b9c62b+3 0:3:qux.txt\n", + }}) + c.Assert(err, check.IsNil) + s.localdb.signCollection(ctx, &wazqux) + + ctx = auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}}) + + // Create using content from existing collections + dst, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{ + ReplaceFiles: map[string]string{ + "/f": foo.PortableDataHash + "/foo.txt", + "/b": foobarbaz.PortableDataHash + "/foo/bar", + "/q": wazqux.PortableDataHash + "/", + "/w": wazqux.PortableDataHash + "/waz", + }, + Attrs: map[string]interface{}{ + "owner_uuid": arvadostest.ActiveUserUUID, + }}) + c.Assert(err, check.IsNil) + s.expectFiles(c, dst, "f", "b/baz.txt", "q/waz/qux.txt", "w/qux.txt") + + // Delete a file and a directory + dst, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{ + UUID: dst.UUID, + ReplaceFiles: map[string]string{ + "/f": "", + "/q/waz": "", + }}) + c.Assert(err, check.IsNil) + s.expectFiles(c, dst, "b/baz.txt", "q/", "w/qux.txt") + + // Move and copy content within collection + dst, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{ + UUID: dst.UUID, + ReplaceFiles: map[string]string{ + // Note splicing content to /b/corge.txt but + // removing everything else from /b + "/b": "", + "/b/corge.txt": dst.PortableDataHash + "/b/baz.txt", + "/quux/corge.txt": dst.PortableDataHash + "/b/baz.txt", + }}) + c.Assert(err, check.IsNil) + s.expectFiles(c, dst, "b/corge.txt", "q/", "w/qux.txt", "quux/corge.txt") + + // Remove everything except one file + dst, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{ + UUID: dst.UUID, + ReplaceFiles: map[string]string{ + "/": "", + "/b/corge.txt": dst.PortableDataHash + "/b/corge.txt", + }}) + c.Assert(err, check.IsNil) + s.expectFiles(c, dst, "b/corge.txt") + + // Copy entire collection to root + dstcopy, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{ + ReplaceFiles: map[string]string{ + "/": dst.PortableDataHash, + }}) + c.Check(err, check.IsNil) + c.Check(dstcopy.PortableDataHash, check.Equals, dst.PortableDataHash) + s.expectFiles(c, dstcopy, "b/corge.txt") + + // Check invalid targets, sources, and combinations + for _, badrepl := range []map[string]string{ + { + "/foo/nope": dst.PortableDataHash + "/b", + "/foo": dst.PortableDataHash + "/b", + }, + { + "/foo": dst.PortableDataHash + "/b", + "/foo/nope": "", + }, + { + "/": dst.PortableDataHash + "/", + "/nope": "", + }, + { + "/": dst.PortableDataHash + "/", + "/nope": dst.PortableDataHash + "/b", + }, + {"/bad/": ""}, + {"/./bad": ""}, + {"/b/./ad": ""}, + {"/b/../ad": ""}, + {"/b/.": ""}, + {".": ""}, + {"bad": ""}, + {"": ""}, + {"/bad": "/b"}, + {"/bad": "bad/b"}, + {"/bad": dst.UUID + "/b"}, + } { + _, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{ + UUID: dst.UUID, + ReplaceFiles: badrepl, + }) + c.Logf("badrepl %#v\n... got err: %s", badrepl, err) + c.Check(err, check.NotNil) + } + + // Check conflicting replace_files and manifest_text + _, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{ + UUID: dst.UUID, + ReplaceFiles: map[string]string{"/": ""}, + Attrs: map[string]interface{}{ + "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:z\n", + }}) + c.Logf("replace_files+manifest_text\n... got err: %s", err) + c.Check(err, check.ErrorMatches, "ambiguous request: both.*replace_files.*manifest_text.*") +} + +// expectFiles checks coll's directory structure against the given +// list of expected files and empty directories. An expected path with +// a trailing slash indicates an empty directory. +func (s *CollectionSuite) expectFiles(c *check.C, coll arvados.Collection, expected ...string) { + client := arvados.NewClientFromEnv() + ac, err := arvadosclient.New(client) + c.Assert(err, check.IsNil) + kc, err := keepclient.MakeKeepClient(ac) + c.Assert(err, check.IsNil) + cfs, err := coll.FileSystem(arvados.NewClientFromEnv(), kc) + c.Assert(err, check.IsNil) + var found []string + nonemptydirs := map[string]bool{} + fs.WalkDir(arvados.FS(cfs), "/", func(path string, d fs.DirEntry, err error) error { + dir, _ := filepath.Split(path) + nonemptydirs[dir] = true + if d.IsDir() { + if path != "/" { + path += "/" + } + if !nonemptydirs[path] { + nonemptydirs[path] = false + } + } else { + found = append(found, path) + } + return nil + }) + for d, nonempty := range nonemptydirs { + if !nonempty { + found = append(found, d) + } + } + for i, path := range found { + if path != "/" { + found[i] = strings.TrimPrefix(path, "/") + } + } + sort.Strings(found) + sort.Strings(expected) + c.Check(found, check.DeepEquals, expected) +} + func (s *CollectionSuite) TestSignatures(c *check.C) { ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}}) diff --git a/sdk/cli/test/test_arv-collection-create.rb b/sdk/cli/test/test_arv-collection-create.rb index 39c50bcc83..1b5a368b7d 100644 --- a/sdk/cli/test/test_arv-collection-create.rb +++ b/sdk/cli/test/test_arv-collection-create.rb @@ -14,14 +14,48 @@ class TestCollectionCreate < Minitest::Test def test_small_collection uuid = Digest::MD5.hexdigest(foo_manifest) + '+' + foo_manifest.size.to_s + ok = nil out, err = capture_subprocess_io do - assert_arv('--format', 'uuid', 'collection', 'create', '--collection', { - uuid: uuid, - manifest_text: foo_manifest - }.to_json) + ok = arv('--format', 'uuid', 'collection', 'create', '--collection', { + uuid: uuid, + manifest_text: foo_manifest + }.to_json) end - assert(/^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/.match(out)) - assert_equal '', err + assert_equal('', err) + assert_equal(true, ok) + assert_match(/^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/, out) + end + + def test_collection_replace_files + ok = nil + uuid, err = capture_subprocess_io do + ok = arv('--format', 'uuid', 'collection', 'create', '--collection', '{}') + end + assert_equal('', err) + assert_equal(true, ok) + assert_match(/^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/, uuid) + uuid = uuid.strip + + out, err = capture_subprocess_io do + ok = arv('--format', 'uuid', + 'collection', 'update', + '--uuid', uuid, + '--collection', '{}', + '--replace-files', { + "/gpl.pdf": "b519d9cb706a29fc7ea24dbea2f05851+93/GNU_General_Public_License,_version_3.pdf", + }.to_json) + end + assert_equal('', err) + assert_equal(true, ok) + assert_equal(uuid, out.strip) + + ok = nil + out, err = capture_subprocess_io do + ok = arv('--format', 'json', 'collection', 'get', '--uuid', uuid) + end + assert_equal('', err) + assert_equal(true, ok) + assert_match(/\. 6a4ff0499484c6c79c95cd8c566bd25f\+249025.* 0:249025:gpl.pdf\\n/, out) end def test_read_resource_object_from_file @@ -29,29 +63,22 @@ class TestCollectionCreate < Minitest::Test begin tempfile.write({manifest_text: foo_manifest}.to_json) tempfile.close + ok = nil out, err = capture_subprocess_io do - assert_arv('--format', 'uuid', - 'collection', 'create', '--collection', tempfile.path) + ok = arv('--format', 'uuid', + 'collection', 'create', '--collection', tempfile.path) end - assert(/^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/.match(out)) - assert_equal '', err + assert_equal('', err) + assert_equal(true, ok) + assert_match(/^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/, out) ensure tempfile.unlink end end protected - def assert_arv(*args) - expect = case args.first - when true, false - args.shift - else - true - end - assert_equal(expect, - system(['./bin/arv', 'arv'], *args), - "`arv #{args.join ' '}` " + - "should exit #{if expect then 0 else 'non-zero' end}") + def arv(*args) + system(['./bin/arv', 'arv'], *args) end def foo_manifest diff --git a/sdk/go/arvados/api.go b/sdk/go/arvados/api.go index 7409b18132..d76ece1edd 100644 --- a/sdk/go/arvados/api.go +++ b/sdk/go/arvados/api.go @@ -139,6 +139,8 @@ type CreateOptions struct { EnsureUniqueName bool `json:"ensure_unique_name"` Select []string `json:"select"` Attrs map[string]interface{} `json:"attrs"` + // ReplaceFiles only applies when creating a collection. + ReplaceFiles map[string]string `json:"replace_files"` } type UpdateOptions struct { @@ -146,6 +148,8 @@ type UpdateOptions struct { Attrs map[string]interface{} `json:"attrs"` Select []string `json:"select"` BypassFederation bool `json:"bypass_federation"` + // ReplaceFiles only applies when updating a collection. + ReplaceFiles map[string]string `json:"replace_files"` } type GroupContentsOptions struct { diff --git a/sdk/go/arvados/fs_backend.go b/sdk/go/arvados/fs_backend.go index 32365a5317..cc4c32ffe9 100644 --- a/sdk/go/arvados/fs_backend.go +++ b/sdk/go/arvados/fs_backend.go @@ -6,6 +6,7 @@ package arvados import ( "context" + "errors" "io" ) @@ -30,3 +31,16 @@ type keepClient interface { type apiClient interface { RequestAndDecode(dst interface{}, method, path string, body io.Reader, params interface{}) error } + +var errStubClient = errors.New("stub client") + +type StubClient struct{} + +func (*StubClient) ReadAt(string, []byte, int) (int, error) { return 0, errStubClient } +func (*StubClient) LocalLocator(loc string) (string, error) { return loc, nil } +func (*StubClient) BlockWrite(context.Context, BlockWriteOptions) (BlockWriteResponse, error) { + return BlockWriteResponse{}, errStubClient +} +func (*StubClient) RequestAndDecode(_ interface{}, _, _ string, _ io.Reader, _ interface{}) error { + return errStubClient +} diff --git a/sdk/go/arvados/fs_base.go b/sdk/go/arvados/fs_base.go index 80b8037293..bebb74261e 100644 --- a/sdk/go/arvados/fs_base.go +++ b/sdk/go/arvados/fs_base.go @@ -8,6 +8,7 @@ import ( "errors" "fmt" "io" + "io/fs" "log" "net/http" "os" @@ -159,6 +160,18 @@ type FileSystem interface { MemorySize() int64 } +type fsFS struct { + FileSystem +} + +// FS returns an fs.FS interface to the given FileSystem, to enable +// the use of fs.WalkDir, etc. +func FS(fs FileSystem) fs.FS { return fsFS{fs} } +func (fs fsFS) Open(path string) (fs.File, error) { + f, err := fs.FileSystem.Open(path) + return f, err +} + type inode interface { SetParent(parent inode, name string) Parent() inode @@ -450,14 +463,14 @@ func (fs *fileSystem) openFile(name string, flag int, perm os.FileMode) (*fileha default: return nil, fmt.Errorf("invalid flags 0x%x", flag) } - if !writable && parent.IsDir() { + if parent.IsDir() { // A directory can be opened via "foo/", "foo/.", or // "foo/..". switch name { case ".", "": - return &filehandle{inode: parent}, nil + return &filehandle{inode: parent, readable: readable, writable: writable}, nil case "..": - return &filehandle{inode: parent.Parent()}, nil + return &filehandle{inode: parent.Parent(), readable: readable, writable: writable}, nil } } createMode := flag&os.O_CREATE != 0 @@ -753,7 +766,7 @@ func Splice(fs FileSystem, target string, newsubtree *Subtree) error { f, err = fs.OpenFile(target, os.O_CREATE|os.O_WRONLY, 0700) } if err != nil { - return err + return fmt.Errorf("open %s: %w", target, err) } defer f.Close() return f.Splice(newsubtree) diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go index 0c5819721e..f4dae746e2 100644 --- a/sdk/go/arvados/fs_collection.go +++ b/sdk/go/arvados/fs_collection.go @@ -1565,7 +1565,7 @@ func (dn *dirnode) snapshot() (*dirnode, error) { func (dn *dirnode) Splice(repl inode) error { repl, err := repl.Snapshot() if err != nil { - return err + return fmt.Errorf("cannot copy snapshot: %w", err) } switch repl := repl.(type) { default: @@ -1599,7 +1599,7 @@ func (dn *dirnode) Splice(repl inode) error { defer dn.Unlock() _, err = dn.parent.Child(dn.fileinfo.name, func(inode) (inode, error) { return repl, nil }) if err != nil { - return err + return fmt.Errorf("error replacing filenode: dn.parent.Child(): %w", err) } repl.fs = dn.fs } diff --git a/sdk/go/arvados/fs_collection_test.go b/sdk/go/arvados/fs_collection_test.go index fab91d1f77..b221aaa083 100644 --- a/sdk/go/arvados/fs_collection_test.go +++ b/sdk/go/arvados/fs_collection_test.go @@ -1441,6 +1441,30 @@ func (s *CollectionFSSuite) TestEdgeCaseManifests(c *check.C) { } } +func (s *CollectionFSSuite) TestSnapshotSplice(c *check.C) { + filedata1 := "hello snapshot+splice world\n" + fs, err := (&Collection{}).FileSystem(s.client, s.kc) + c.Assert(err, check.IsNil) + { + f, err := fs.OpenFile("file1", os.O_CREATE|os.O_RDWR, 0700) + c.Assert(err, check.IsNil) + _, err = f.Write([]byte(filedata1)) + c.Assert(err, check.IsNil) + err = f.Close() + c.Assert(err, check.IsNil) + } + + snap, err := Snapshot(fs, "/") + c.Assert(err, check.IsNil) + err = Splice(fs, "dir1", snap) + c.Assert(err, check.IsNil) + f, err := fs.Open("dir1/file1") + c.Assert(err, check.IsNil) + buf, err := io.ReadAll(f) + c.Assert(err, check.IsNil) + c.Check(string(buf), check.Equals, filedata1) +} + func (s *CollectionFSSuite) TestRefreshSignatures(c *check.C) { filedata1 := "hello refresh signatures world\n" fs, err := (&Collection{}).FileSystem(s.client, s.kc) diff --git a/sdk/go/arvados/fs_filehandle.go b/sdk/go/arvados/fs_filehandle.go index 4530a7b06a..f50dd4612b 100644 --- a/sdk/go/arvados/fs_filehandle.go +++ b/sdk/go/arvados/fs_filehandle.go @@ -6,6 +6,7 @@ package arvados import ( "io" + "io/fs" "os" ) @@ -73,6 +74,31 @@ func (f *filehandle) Write(p []byte) (n int, err error) { return } +// dirEntry implements fs.DirEntry, see (*filehandle)ReadDir(). +type dirEntry struct { + os.FileInfo +} + +func (ent dirEntry) Type() fs.FileMode { + return ent.Mode().Type() +} +func (ent dirEntry) Info() (fs.FileInfo, error) { + return ent, nil +} + +// ReadDir implements fs.ReadDirFile. +func (f *filehandle) ReadDir(count int) ([]fs.DirEntry, error) { + fis, err := f.Readdir(count) + if len(fis) == 0 { + return nil, err + } + ents := make([]fs.DirEntry, len(fis)) + for i, fi := range fis { + ents[i] = dirEntry{fi} + } + return ents, err +} + func (f *filehandle) Readdir(count int) ([]os.FileInfo, error) { if !f.inode.IsDir() { return nil, ErrInvalidOperation diff --git a/sdk/go/arvados/fs_site_test.go b/sdk/go/arvados/fs_site_test.go index 59fa5fc176..bf24efa7ed 100644 --- a/sdk/go/arvados/fs_site_test.go +++ b/sdk/go/arvados/fs_site_test.go @@ -10,6 +10,7 @@ import ( "io/ioutil" "net/http" "os" + "strings" "syscall" "time" @@ -291,40 +292,41 @@ func (s *SiteFSSuite) TestSnapshotSplice(c *check.C) { c.Check(string(buf), check.Equals, string(thisfile)) } - // Cannot splice a file onto a collection root, or anywhere - // outside a collection + // Cannot splice a file onto a collection root; cannot splice + // anything to a target outside a collection. for _, badpath := range []string{ + dstPath + "/", dstPath, + "/home/A Project/newnodename/", "/home/A Project/newnodename", + "/home/A Project/", "/home/A Project", + "/home/newnodename/", "/home/newnodename", + "/home/", "/home", + "/newnodename/", "/newnodename", + "/", } { err = Splice(s.fs, badpath, snapFile) c.Check(err, check.NotNil) - c.Check(err, ErrorIs, ErrInvalidOperation, check.Commentf("badpath %s")) - if badpath == dstPath { - c.Check(err, check.ErrorMatches, `cannot use Splice to attach a file at top level of \*arvados.collectionFileSystem: invalid operation`, check.Commentf("badpath: %s", badpath)) + if strings.Contains(badpath, "newnodename") && strings.HasSuffix(badpath, "/") { + c.Check(err, ErrorIs, os.ErrNotExist, check.Commentf("badpath %q", badpath)) + } else { + c.Check(err, ErrorIs, ErrInvalidOperation, check.Commentf("badpath %q", badpath)) + } + if strings.TrimSuffix(badpath, "/") == dstPath { + c.Check(err, check.ErrorMatches, `cannot use Splice to attach a file at top level of \*arvados.collectionFileSystem: invalid operation`, check.Commentf("badpath: %q", badpath)) continue } - err = Splice(s.fs, badpath, snap1) - c.Check(err, ErrorIs, ErrInvalidOperation, check.Commentf("badpath %s")) - } - // Destination cannot have trailing slash - for _, badpath := range []string{ - dstPath + "/ctxlog/", - dstPath + "/", - "/home/A Project/", - "/home/", - "/", - "", - } { err = Splice(s.fs, badpath, snap1) - c.Check(err, ErrorIs, ErrInvalidArgument, check.Commentf("badpath %s", badpath)) - err = Splice(s.fs, badpath, snapFile) - c.Check(err, ErrorIs, ErrInvalidArgument, check.Commentf("badpath %s", badpath)) + if strings.Contains(badpath, "newnodename") && strings.HasSuffix(badpath, "/") { + c.Check(err, ErrorIs, os.ErrNotExist, check.Commentf("badpath %q", badpath)) + } else { + c.Check(err, ErrorIs, ErrInvalidOperation, check.Commentf("badpath %q", badpath)) + } } // Destination's parent must already exist @@ -340,9 +342,10 @@ func (s *SiteFSSuite) TestSnapshotSplice(c *check.C) { } snap2, err := Snapshot(s.fs, dstPath+"/ctxlog-copy") - c.Check(err, check.IsNil) - err = Splice(s.fs, dstPath+"/ctxlog-copy-copy", snap2) - c.Check(err, check.IsNil) + if c.Check(err, check.IsNil) { + err = Splice(s.fs, dstPath+"/ctxlog-copy-copy", snap2) + c.Check(err, check.IsNil) + } // Snapshot entire collection, splice into same collection at // a new path, remove file from original location, verify @@ -362,9 +365,10 @@ func (s *SiteFSSuite) TestSnapshotSplice(c *check.C) { _, err = s.fs.Open(dstPath + "/arvados/fs_site_test.go") c.Check(err, check.Equals, os.ErrNotExist) f, err = s.fs.Open(dstPath + "/copy2/arvados/fs_site_test.go") - c.Check(err, check.IsNil) - defer f.Close() - buf, err := ioutil.ReadAll(f) - c.Check(err, check.IsNil) - c.Check(string(buf), check.Equals, string(thisfile)) + if c.Check(err, check.IsNil) { + defer f.Close() + buf, err := ioutil.ReadAll(f) + c.Check(err, check.IsNil) + c.Check(string(buf), check.Equals, string(thisfile)) + } } diff --git a/sdk/go/httpserver/error.go b/sdk/go/httpserver/error.go index f1817d3374..75ff85336f 100644 --- a/sdk/go/httpserver/error.go +++ b/sdk/go/httpserver/error.go @@ -6,9 +6,14 @@ package httpserver import ( "encoding/json" + "fmt" "net/http" ) +func Errorf(status int, tmpl string, args ...interface{}) error { + return errorWithStatus{fmt.Errorf(tmpl, args...), status} +} + func ErrorWithStatus(err error, status int) error { return errorWithStatus{err, status} } diff --git a/services/api/app/controllers/arvados/v1/schema_controller.rb b/services/api/app/controllers/arvados/v1/schema_controller.rb index 100b916817..5508ac0fbd 100644 --- a/services/api/app/controllers/arvados/v1/schema_controller.rb +++ b/services/api/app/controllers/arvados/v1/schema_controller.rb @@ -406,6 +406,20 @@ class Arvados::V1::SchemaController < ApplicationController end end + # The 'replace_files' option is implemented in lib/controller, + # not Rails -- we just need to add it here so discovery-aware + # clients know how to validate it. + [:create, :update].each do |action| + discovery[:resources]['collections'][:methods][action][:parameters]['replace_files'] = { + type: 'object', + description: 'Files and directories to initialize/replace with content from other collections.', + required: false, + location: 'query', + properties: {}, + additionalProperties: {type: 'string'}, + } + end + discovery[:resources]['configs'] = { methods: { get: {