[ARVADOS] created: 2.1.0-2014-g478a37229
Git user
git at public.arvados.org
Thu Mar 10 21:31:47 UTC 2022
at 478a372296f2ede0095827bb58508f0772c63ae2 (commit)
commit 478a372296f2ede0095827bb58508f0772c63ae2
Author: Tom Clegg <tom at curii.com>
Date: Thu Mar 10 16:30:43 2022 -0500
18600: Test replace_files feature via sdk/cli.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/cli/test/test_arv-collection-create.rb b/sdk/cli/test/test_arv-collection-create.rb
index 39c50bcc8..1b5a368b7 100644
--- a/sdk/cli/test/test_arv-collection-create.rb
+++ b/sdk/cli/test/test_arv-collection-create.rb
@@ -14,14 +14,48 @@ class TestCollectionCreate < Minitest::Test
def test_small_collection
uuid = Digest::MD5.hexdigest(foo_manifest) + '+' + foo_manifest.size.to_s
+ ok = nil
out, err = capture_subprocess_io do
- assert_arv('--format', 'uuid', 'collection', 'create', '--collection', {
- uuid: uuid,
- manifest_text: foo_manifest
- }.to_json)
+ ok = arv('--format', 'uuid', 'collection', 'create', '--collection', {
+ uuid: uuid,
+ manifest_text: foo_manifest
+ }.to_json)
end
- assert(/^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/.match(out))
- assert_equal '', err
+ assert_equal('', err)
+ assert_equal(true, ok)
+ assert_match(/^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/, out)
+ end
+
+ def test_collection_replace_files
+ ok = nil
+ uuid, err = capture_subprocess_io do
+ ok = arv('--format', 'uuid', 'collection', 'create', '--collection', '{}')
+ end
+ assert_equal('', err)
+ assert_equal(true, ok)
+ assert_match(/^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/, uuid)
+ uuid = uuid.strip
+
+ out, err = capture_subprocess_io do
+ ok = arv('--format', 'uuid',
+ 'collection', 'update',
+ '--uuid', uuid,
+ '--collection', '{}',
+ '--replace-files', {
+ "/gpl.pdf": "b519d9cb706a29fc7ea24dbea2f05851+93/GNU_General_Public_License,_version_3.pdf",
+ }.to_json)
+ end
+ assert_equal('', err)
+ assert_equal(true, ok)
+ assert_equal(uuid, out.strip)
+
+ ok = nil
+ out, err = capture_subprocess_io do
+ ok = arv('--format', 'json', 'collection', 'get', '--uuid', uuid)
+ end
+ assert_equal('', err)
+ assert_equal(true, ok)
+ assert_match(/\. 6a4ff0499484c6c79c95cd8c566bd25f\+249025.* 0:249025:gpl.pdf\\n/, out)
end
def test_read_resource_object_from_file
@@ -29,29 +63,22 @@ class TestCollectionCreate < Minitest::Test
begin
tempfile.write({manifest_text: foo_manifest}.to_json)
tempfile.close
+ ok = nil
out, err = capture_subprocess_io do
- assert_arv('--format', 'uuid',
- 'collection', 'create', '--collection', tempfile.path)
+ ok = arv('--format', 'uuid',
+ 'collection', 'create', '--collection', tempfile.path)
end
- assert(/^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/.match(out))
- assert_equal '', err
+ assert_equal('', err)
+ assert_equal(true, ok)
+ assert_match(/^([0-9a-z]{5}-4zz18-[0-9a-z]{15})?$/, out)
ensure
tempfile.unlink
end
end
protected
- def assert_arv(*args)
- expect = case args.first
- when true, false
- args.shift
- else
- true
- end
- assert_equal(expect,
- system(['./bin/arv', 'arv'], *args),
- "`arv #{args.join ' '}` " +
- "should exit #{if expect then 0 else 'non-zero' end}")
+ def arv(*args)
+ system(['./bin/arv', 'arv'], *args)
end
def foo_manifest
commit 55723c549ef42d76584a9791036a12c68ffb95eb
Author: Tom Clegg <tom at curii.com>
Date: Thu Mar 10 16:30:27 2022 -0500
18600: Move "splices" to "update_files" update/create option.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/doc/api/methods/collections.html.textile.liquid b/doc/api/methods/collections.html.textile.liquid
index bea19bb75..ea67937e8 100644
--- a/doc/api/methods/collections.html.textile.liquid
+++ b/doc/api/methods/collections.html.textile.liquid
@@ -71,9 +71,10 @@ Arguments:
table(table table-bordered table-condensed).
|_. Argument |_. Type |_. Description |_. Location |_. Example |
-|collection|object||query||
+|collection|object||body||
+|replace_files|object|Initialize files and directories using content from other collections|body||
-The new collection's content can be initialized by providing a @manifest_text@ or @splices@ key in the provided @collection@ object (see "splices":#splices below).
+The new collection's content can be initialized by providing a @manifest_text@ key in the provided @collection@ object, or by using the @replace_files@ option (see "replace_files":#replace_files below).
h3. delete
@@ -117,9 +118,10 @@ Arguments:
table(table table-bordered table-condensed).
|_. Argument |_. Type |_. Description |_. Location |_. Example |
{background:#ccffcc}.|uuid|string|The UUID of the Collection in question.|path||
-|collection|object||query||
+|collection|object||body||
+|replace_files|object|Delete and replace files and directories using content from other collections|body||
-The collection's content can be updated by providing a @manifest_text@ or @splices@ key in the provided @collection@ object (see "splices":#splices below).
+The collection's content can be updated by providing a @manifest_text@ key in the provided @collection@ object, or by using the @replace_files@ option (see "replace_files":#replace_files below).
h3. untrash
@@ -165,11 +167,11 @@ table(table table-bordered table-condensed).
|_. Argument |_. Type |_. Description |_. Location |_. Example |
{background:#ccffcc}.|uuid|string|The UUID of the Collection to get usage.|path||
-h2(#splices). Using "splices" to create/update collections
+h2(#replace_files). Using "replace_files" to create/update collections
-The @splices@ attribute can be used with the @create@ and @update@ APIs to efficiently copy individual files and directory trees from other collections, and copy/rename/delete items within an existing collection, without transferring any file data.
+The @replace_files@ option can be used with the @create@ and @update@ APIs to efficiently copy individual files and directory trees from other collections, and copy/rename/delete items within an existing collection, without transferring any file data.
- at splices@ keys indicate target paths in the new collection, and values specify sources that should be copied to the target paths.
+ at replace_files@ keys indicate target paths in the new collection, and values specify sources that should be copied to the target paths.
* Each target path must be an absolute canonical path beginning with @/@. It must not contain @.@ or @..@ components, consecutive @/@ characters, or a trailing @/@ after the final component.
* Each source must be either an empty string (signifying that the target path is to be deleted), or @PDH/path@ where @PDH@ is the portable data hash of a collection on the cluster and @/path@ is a file or directory in that collection.
* In an @update@ request, sources may reference the current portable data hash of the collection being updated.
@@ -177,53 +179,43 @@ The @splices@ attribute can be used with the @create@ and @update@ APIs to effic
Example: delete @foo.txt@ from a collection
<notextile><pre>
-"collection": {
- "splices": {
- "/foo.txt": ""
- }
+"replace_files": {
+ "/foo.txt": ""
}
</pre></notextile>
Example: rename @foo.txt@ to @bar.txt@ in a collection with portable data hash @fa7aeb5140e2848d39b416daeef4ffc5+45@
<notextile><pre>
-"collection": {
- "splices": {
- "/foo.txt": "",
- "/bar.txt": "fa7aeb5140e2848d39b416daeef4ffc5+45/foo.txt"
- }
+"replace_files": {
+ "/foo.txt": "",
+ "/bar.txt": "fa7aeb5140e2848d39b416daeef4ffc5+45/foo.txt"
}
</pre></notextile>
Example: delete current contents, then add content from multiple collections
<notextile><pre>
-"collection": {
- "splices": {
- "/": "",
- "/copy of collection 1": "1f4b0bc7583c2a7f9102c395f4ffc5e3+45/",
- "/copy of collection 2": "ea10d51bcf88862dbcc36eb292017dfd+45/"
- }
+"replace_files": {
+ "/": "",
+ "/copy of collection 1": "1f4b0bc7583c2a7f9102c395f4ffc5e3+45/",
+ "/copy of collection 2": "ea10d51bcf88862dbcc36eb292017dfd+45/"
}
</pre></notextile>
Example: replace entire collection with a copy of a subdirectory from another collection
<notextile><pre>
-"collection": {
- "splices": {
- "/": "1f4b0bc7583c2a7f9102c395f4ffc5e3+45/subdir"
- }
+"replace_files": {
+ "/": "1f4b0bc7583c2a7f9102c395f4ffc5e3+45/subdir"
}
</pre></notextile>
A target path with a non-empty source cannot be the ancestor of another target path in the same request. For example, the following request is invalid:
<notextile><pre>
-"collection": {
- "splices": {
- "/foo": "fa7aeb5140e2848d39b416daeef4ffc5+45/",
- "/foo/this_will_return_an_error": ""
- }
+"replace_files": {
+ "/foo": "fa7aeb5140e2848d39b416daeef4ffc5+45/",
+ "/foo/this_will_return_an_error": ""
}
</pre></notextile>
diff --git a/lib/controller/localdb/collection.go b/lib/controller/localdb/collection.go
index 2283b2fcb..868e466e9 100644
--- a/lib/controller/localdb/collection.go
+++ b/lib/controller/localdb/collection.go
@@ -68,7 +68,7 @@ func (conn *Conn) CollectionCreate(ctx context.Context, opts arvados.CreateOptio
// them.
opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...)
}
- if err := conn.applySplices(ctx, "", opts.Attrs); err != nil {
+ if opts.Attrs, err = conn.applyReplaceFilesOption(ctx, "", opts.Attrs, opts.ReplaceFiles); err != nil {
return arvados.Collection{}, err
}
resp, err := conn.railsProxy.CollectionCreate(ctx, opts)
@@ -92,7 +92,7 @@ func (conn *Conn) CollectionUpdate(ctx context.Context, opts arvados.UpdateOptio
// them.
opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...)
}
- if err := conn.applySplices(ctx, opts.UUID, opts.Attrs); err != nil {
+ if opts.Attrs, err = conn.applyReplaceFilesOption(ctx, opts.UUID, opts.Attrs, opts.ReplaceFiles); err != nil {
return arvados.Collection{}, err
}
resp, err := conn.railsProxy.CollectionUpdate(ctx, opts)
@@ -122,64 +122,42 @@ func (conn *Conn) signCollection(ctx context.Context, coll *arvados.Collection)
coll.ManifestText = arvados.SignManifest(coll.ManifestText, token, exp, ttl, []byte(conn.cluster.Collections.BlobSigningKey))
}
-// If attrs["splices"] is present, populate attrs["manifest_text"] by
+// If replaceFiles is non-empty, populate attrs["manifest_text"] by
// starting with the content of fromUUID (or an empty collection if
-// fromUUID is empty) and applying the specified splice operations.
-func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[string]interface{}) error {
- var splices map[string]string
-
- // Validate the incoming attrs, and return early if the
- // request doesn't ask for any splices.
- if sp, ok := attrs["splices"]; !ok {
- return nil
- } else {
- switch sp := sp.(type) {
- default:
- return httpserver.Errorf(http.StatusBadRequest, "invalid type %T for splices parameter", sp)
- case nil:
- return nil
- case map[string]string:
- splices = sp
- case map[string]interface{}:
- splices = make(map[string]string, len(sp))
- for dst, src := range sp {
- if src, ok := src.(string); ok {
- splices[dst] = src
- } else {
- return httpserver.Errorf(http.StatusBadRequest, "invalid source type for splice target %q: %v", dst, src)
- }
- }
- }
- if len(splices) == 0 {
- return nil
- } else if mtxt, ok := attrs["manifest_text"].(string); ok && len(mtxt) > 0 {
- return httpserver.Errorf(http.StatusBadRequest, "ambiguous request: both 'splices' and 'manifest_text' values provided")
- }
+// fromUUID is empty) and applying the specified file/directory
+// replacements.
+//
+// Return value is the (possibly modified) attrs map.
+func (conn *Conn) applyReplaceFilesOption(ctx context.Context, fromUUID string, attrs map[string]interface{}, replaceFiles map[string]string) (map[string]interface{}, error) {
+ if len(replaceFiles) == 0 {
+ return attrs, nil
+ } else if mtxt, ok := attrs["manifest_text"].(string); ok && len(mtxt) > 0 {
+ return nil, httpserver.Errorf(http.StatusBadRequest, "ambiguous request: both 'replace_files' and attrs['manifest_text'] values provided")
}
// Load the current collection (if any) and set up an
// in-memory filesystem.
var dst arvados.Collection
- if _, rootsplice := splices["/"]; !rootsplice && fromUUID != "" {
+ if _, replacingRoot := replaceFiles["/"]; !replacingRoot && fromUUID != "" {
src, err := conn.CollectionGet(ctx, arvados.GetOptions{UUID: fromUUID})
if err != nil {
- return err
+ return nil, err
}
dst = src
}
dstfs, err := dst.FileSystem(&arvados.StubClient{}, &arvados.StubClient{})
if err != nil {
- return err
+ return nil, err
}
- // Sort splices by source collection to avoid redundant
+ // Sort replacements by source collection to avoid redundant
// reloads when a source collection is used more than
// once. Note empty sources (which mean "delete target path")
// sort first.
- dstTodo := make([]string, 0, len(splices))
+ dstTodo := make([]string, 0, len(replaceFiles))
{
- srcid := make(map[string]string, len(splices))
- for dst, src := range splices {
+ srcid := make(map[string]string, len(replaceFiles))
+ for dst, src := range replaceFiles {
dstTodo = append(dstTodo, dst)
if i := strings.IndexRune(src, '/'); i > 0 {
srcid[dst] = src[:i]
@@ -190,7 +168,7 @@ func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[s
})
}
- // Reject attempt to splice a node as well as its descendant
+ // Reject attempt to replace a node as well as its descendant
// (e.g., a/ and a/b/), which is unsupported, except where the
// source for a/ is empty (i.e., delete).
for _, dst := range dstTodo {
@@ -201,7 +179,7 @@ func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[s
strings.Contains(dst, "/./") ||
strings.Contains(dst, "/../") ||
!strings.HasPrefix(dst, "/")) {
- return httpserver.Errorf(http.StatusBadRequest, "invalid splice target: %q", dst)
+ return nil, httpserver.Errorf(http.StatusBadRequest, "invalid replace_files target: %q", dst)
}
for i := 0; i < len(dst)-1; i++ {
if dst[i] != '/' {
@@ -211,17 +189,17 @@ func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[s
if outerdst == "" {
outerdst = "/"
}
- if outersrc := splices[outerdst]; outersrc != "" {
- return httpserver.Errorf(http.StatusBadRequest, "cannot splice at target %q with non-empty splice at %q", dst, outerdst)
+ if outersrc := replaceFiles[outerdst]; outersrc != "" {
+ return nil, httpserver.Errorf(http.StatusBadRequest, "replace_files: cannot operate on target %q inside non-empty target %q", dst, outerdst)
}
}
}
var srcidloaded string
var srcfs arvados.FileSystem
- // Apply the requested splices.
+ // Apply the requested replacements.
for _, dst := range dstTodo {
- src := splices[dst]
+ src := replaceFiles[dst]
if src == "" {
if dst == "/" {
// In this case we started with a
@@ -231,14 +209,14 @@ func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[s
}
err := dstfs.RemoveAll(dst)
if err != nil {
- return fmt.Errorf("RemoveAll(%s): %w", dst, err)
+ return nil, fmt.Errorf("RemoveAll(%s): %w", dst, err)
}
continue
}
srcspec := strings.SplitN(src, "/", 2)
srcid, srcpath := srcspec[0], "/"
if !arvadosclient.PDHMatch(srcid) {
- return httpserver.Errorf(http.StatusBadRequest, "invalid source %q for splices[%q]: must be \"\" or \"PDH\" or \"PDH/path\"", src, dst)
+ return nil, httpserver.Errorf(http.StatusBadRequest, "invalid source %q for replace_files[%q]: must be \"\" or \"PDH\" or \"PDH/path\"", src, dst)
}
if len(srcspec) == 2 && srcspec[1] != "" {
srcpath = srcspec[1]
@@ -247,20 +225,20 @@ func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[s
srcfs = nil
srccoll, err := conn.CollectionGet(ctx, arvados.GetOptions{UUID: srcid})
if err != nil {
- return err
+ return nil, err
}
// We use StubClient here because we don't
// want srcfs to read/write any file data or
// sync collection state to/from the database.
srcfs, err = srccoll.FileSystem(&arvados.StubClient{}, &arvados.StubClient{})
if err != nil {
- return err
+ return nil, err
}
srcidloaded = srcid
}
snap, err := arvados.Snapshot(srcfs, srcpath)
if err != nil {
- return httpserver.Errorf(http.StatusBadRequest, "error getting snapshot of %q from %q: %w", srcpath, srcid, err)
+ return nil, httpserver.Errorf(http.StatusBadRequest, "error getting snapshot of %q from %q: %w", srcpath, srcid, err)
}
// Create intermediate dirs, in case dst is
// "newdir1/newdir2/dst".
@@ -268,20 +246,22 @@ func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[s
if dst[i] == '/' {
err = dstfs.Mkdir(dst[:i], 0777)
if err != nil && !os.IsExist(err) {
- return httpserver.Errorf(http.StatusBadRequest, "error creating parent dirs for %q: %w", dst, err)
+ return nil, httpserver.Errorf(http.StatusBadRequest, "error creating parent dirs for %q: %w", dst, err)
}
}
}
err = arvados.Splice(dstfs, dst, snap)
if err != nil {
- return fmt.Errorf("error splicing snapshot onto path %q: %w", dst, err)
+ return nil, fmt.Errorf("error splicing snapshot onto path %q: %w", dst, err)
}
}
mtxt, err := dstfs.MarshalManifest(".")
if err != nil {
- return err
+ return nil, err
+ }
+ if attrs == nil {
+ attrs = make(map[string]interface{}, 1)
}
- delete(attrs, "splices")
attrs["manifest_text"] = mtxt
- return nil
+ return attrs, nil
}
diff --git a/lib/controller/localdb/collection_test.go b/lib/controller/localdb/collection_test.go
index e71ae35c2..dac8b769f 100644
--- a/lib/controller/localdb/collection_test.go
+++ b/lib/controller/localdb/collection_test.go
@@ -125,7 +125,7 @@ func (s *CollectionSuite) TestCollectionCreateAndUpdateWithProperties(c *check.C
}
}
-func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) {
+func (s *CollectionSuite) TestCollectionReplaceFiles(c *check.C) {
ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.AdminToken}})
foo, err := s.localdb.railsProxy.CollectionCreate(ctx, arvados.CreateOptions{
Attrs: map[string]interface{}{
@@ -153,14 +153,14 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) {
// Create using content from existing collections
dst, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{
+ ReplaceFiles: map[string]string{
+ "/f": foo.PortableDataHash + "/foo.txt",
+ "/b": foobarbaz.PortableDataHash + "/foo/bar",
+ "/q": wazqux.PortableDataHash + "/",
+ "/w": wazqux.PortableDataHash + "/waz",
+ },
Attrs: map[string]interface{}{
"owner_uuid": arvadostest.ActiveUserUUID,
- "splices": map[string]string{
- "/f": foo.PortableDataHash + "/foo.txt",
- "/b": foobarbaz.PortableDataHash + "/foo/bar",
- "/q": wazqux.PortableDataHash + "/",
- "/w": wazqux.PortableDataHash + "/waz",
- },
}})
c.Assert(err, check.IsNil)
s.expectFiles(c, dst, "f", "b/baz.txt", "q/waz/qux.txt", "w/qux.txt")
@@ -168,11 +168,9 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) {
// Delete a file and a directory
dst, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
UUID: dst.UUID,
- Attrs: map[string]interface{}{
- "splices": map[string]string{
- "/f": "",
- "/q/waz": "",
- },
+ ReplaceFiles: map[string]string{
+ "/f": "",
+ "/q/waz": "",
}})
c.Assert(err, check.IsNil)
s.expectFiles(c, dst, "b/baz.txt", "q/", "w/qux.txt")
@@ -180,15 +178,12 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) {
// Move and copy content within collection
dst, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
UUID: dst.UUID,
- Attrs: map[string]interface{}{
- "splices": map[string]string{
- // Note splicing content to
- // /b/corge.txt but removing
- // everything else from /b
- "/b": "",
- "/b/corge.txt": dst.PortableDataHash + "/b/baz.txt",
- "/quux/corge.txt": dst.PortableDataHash + "/b/baz.txt",
- },
+ ReplaceFiles: map[string]string{
+ // Note splicing content to /b/corge.txt but
+ // removing everything else from /b
+ "/b": "",
+ "/b/corge.txt": dst.PortableDataHash + "/b/baz.txt",
+ "/quux/corge.txt": dst.PortableDataHash + "/b/baz.txt",
}})
c.Assert(err, check.IsNil)
s.expectFiles(c, dst, "b/corge.txt", "q/", "w/qux.txt", "quux/corge.txt")
@@ -196,31 +191,24 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) {
// Remove everything except one file
dst, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
UUID: dst.UUID,
- Attrs: map[string]interface{}{
- "splices": map[string]string{
- "/": "",
- "/b/corge.txt": dst.PortableDataHash + "/b/corge.txt",
- },
+ ReplaceFiles: map[string]string{
+ "/": "",
+ "/b/corge.txt": dst.PortableDataHash + "/b/corge.txt",
}})
c.Assert(err, check.IsNil)
s.expectFiles(c, dst, "b/corge.txt")
// Copy entire collection to root
dstcopy, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{
- Attrs: map[string]interface{}{
- // Note map[string]interface{} here, which is
- // how lib/controller/router requests will
- // look.
- "splices": map[string]interface{}{
- "/": dst.PortableDataHash,
- },
+ ReplaceFiles: map[string]string{
+ "/": dst.PortableDataHash,
}})
c.Check(err, check.IsNil)
c.Check(dstcopy.PortableDataHash, check.Equals, dst.PortableDataHash)
s.expectFiles(c, dstcopy, "b/corge.txt")
// Check invalid targets, sources, and combinations
- for _, splices := range []map[string]string{
+ for _, badrepl := range []map[string]string{
{
"/foo/nope": dst.PortableDataHash + "/b",
"/foo": dst.PortableDataHash + "/b",
@@ -250,40 +238,22 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) {
{"/bad": dst.UUID + "/b"},
} {
_, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
- UUID: dst.UUID,
- Attrs: map[string]interface{}{
- "splices": splices,
- }})
- c.Logf("splices %#v\n... got err: %s", splices, err)
+ UUID: dst.UUID,
+ ReplaceFiles: badrepl,
+ })
+ c.Logf("badrepl %#v\n... got err: %s", badrepl, err)
c.Check(err, check.NotNil)
}
- // Check "splices" value that isn't even the right type
- for _, splices := range []interface{}{
- map[string]int{"foo": 1},
- map[int]string{1: "foo"},
- 12345,
- "foo",
- []string{"foo"},
- } {
- _, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
- UUID: dst.UUID,
- Attrs: map[string]interface{}{
- "splices": splices,
- }})
- c.Logf("splices %#v\n... got err: %s", splices, err)
- c.Check(err, check.ErrorMatches, "invalid type .* for splices parameter")
- }
-
- // Check conflicting splices and manifest_text
+ // Check conflicting replace_files and manifest_text
_, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
- UUID: dst.UUID,
+ UUID: dst.UUID,
+ ReplaceFiles: map[string]string{"/": ""},
Attrs: map[string]interface{}{
- "splices": map[string]string{"/": ""},
"manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:z\n",
}})
- c.Logf("splices+manifest_text\n... got err: %s", err)
- c.Check(err, check.ErrorMatches, "ambiguous request: both.*splices.*manifest_text.*")
+ c.Logf("replace_files+manifest_text\n... got err: %s", err)
+ c.Check(err, check.ErrorMatches, "ambiguous request: both.*replace_files.*manifest_text.*")
}
// expectFiles checks coll's directory structure against the given
diff --git a/sdk/go/arvados/api.go b/sdk/go/arvados/api.go
index 7409b1813..d76ece1ed 100644
--- a/sdk/go/arvados/api.go
+++ b/sdk/go/arvados/api.go
@@ -139,6 +139,8 @@ type CreateOptions struct {
EnsureUniqueName bool `json:"ensure_unique_name"`
Select []string `json:"select"`
Attrs map[string]interface{} `json:"attrs"`
+ // ReplaceFiles only applies when creating a collection.
+ ReplaceFiles map[string]string `json:"replace_files"`
}
type UpdateOptions struct {
@@ -146,6 +148,8 @@ type UpdateOptions struct {
Attrs map[string]interface{} `json:"attrs"`
Select []string `json:"select"`
BypassFederation bool `json:"bypass_federation"`
+ // ReplaceFiles only applies when updating a collection.
+ ReplaceFiles map[string]string `json:"replace_files"`
}
type GroupContentsOptions struct {
diff --git a/services/api/app/controllers/arvados/v1/schema_controller.rb b/services/api/app/controllers/arvados/v1/schema_controller.rb
index 59ac639ba..ac2367aa0 100644
--- a/services/api/app/controllers/arvados/v1/schema_controller.rb
+++ b/services/api/app/controllers/arvados/v1/schema_controller.rb
@@ -406,6 +406,20 @@ class Arvados::V1::SchemaController < ApplicationController
end
end
+ # The 'replace_files' option is implemented in lib/controller,
+ # not Rails -- we just need to add it here so discovery-aware
+ # clients know how to validate it.
+ [:create, :update].each do |action|
+ discovery[:resources]['collections'][:methods][action][:parameters]['replace_files'] = {
+ type: 'object',
+ description: 'Files and directories to initialize/replace with content from other collections.',
+ required: false,
+ location: 'query',
+ properties: {},
+ additionalProperties: {type: 'string'},
+ }
+ end
+
discovery[:resources]['configs'] = {
methods: {
get: {
commit f00d666fdf3f4322a6e153a9dc3a25069859e52d
Author: Tom Clegg <tom at curii.com>
Date: Mon Mar 7 15:15:57 2022 -0500
18600: Update error expectations in Snapshot/Splice tests.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/fs_site_test.go b/sdk/go/arvados/fs_site_test.go
index 59fa5fc17..bf24efa7e 100644
--- a/sdk/go/arvados/fs_site_test.go
+++ b/sdk/go/arvados/fs_site_test.go
@@ -10,6 +10,7 @@ import (
"io/ioutil"
"net/http"
"os"
+ "strings"
"syscall"
"time"
@@ -291,40 +292,41 @@ func (s *SiteFSSuite) TestSnapshotSplice(c *check.C) {
c.Check(string(buf), check.Equals, string(thisfile))
}
- // Cannot splice a file onto a collection root, or anywhere
- // outside a collection
+ // Cannot splice a file onto a collection root; cannot splice
+ // anything to a target outside a collection.
for _, badpath := range []string{
+ dstPath + "/",
dstPath,
+ "/home/A Project/newnodename/",
"/home/A Project/newnodename",
+ "/home/A Project/",
"/home/A Project",
+ "/home/newnodename/",
"/home/newnodename",
+ "/home/",
"/home",
+ "/newnodename/",
"/newnodename",
+ "/",
} {
err = Splice(s.fs, badpath, snapFile)
c.Check(err, check.NotNil)
- c.Check(err, ErrorIs, ErrInvalidOperation, check.Commentf("badpath %s"))
- if badpath == dstPath {
- c.Check(err, check.ErrorMatches, `cannot use Splice to attach a file at top level of \*arvados.collectionFileSystem: invalid operation`, check.Commentf("badpath: %s", badpath))
+ if strings.Contains(badpath, "newnodename") && strings.HasSuffix(badpath, "/") {
+ c.Check(err, ErrorIs, os.ErrNotExist, check.Commentf("badpath %q", badpath))
+ } else {
+ c.Check(err, ErrorIs, ErrInvalidOperation, check.Commentf("badpath %q", badpath))
+ }
+ if strings.TrimSuffix(badpath, "/") == dstPath {
+ c.Check(err, check.ErrorMatches, `cannot use Splice to attach a file at top level of \*arvados.collectionFileSystem: invalid operation`, check.Commentf("badpath: %q", badpath))
continue
}
- err = Splice(s.fs, badpath, snap1)
- c.Check(err, ErrorIs, ErrInvalidOperation, check.Commentf("badpath %s"))
- }
- // Destination cannot have trailing slash
- for _, badpath := range []string{
- dstPath + "/ctxlog/",
- dstPath + "/",
- "/home/A Project/",
- "/home/",
- "/",
- "",
- } {
err = Splice(s.fs, badpath, snap1)
- c.Check(err, ErrorIs, ErrInvalidArgument, check.Commentf("badpath %s", badpath))
- err = Splice(s.fs, badpath, snapFile)
- c.Check(err, ErrorIs, ErrInvalidArgument, check.Commentf("badpath %s", badpath))
+ if strings.Contains(badpath, "newnodename") && strings.HasSuffix(badpath, "/") {
+ c.Check(err, ErrorIs, os.ErrNotExist, check.Commentf("badpath %q", badpath))
+ } else {
+ c.Check(err, ErrorIs, ErrInvalidOperation, check.Commentf("badpath %q", badpath))
+ }
}
// Destination's parent must already exist
@@ -340,9 +342,10 @@ func (s *SiteFSSuite) TestSnapshotSplice(c *check.C) {
}
snap2, err := Snapshot(s.fs, dstPath+"/ctxlog-copy")
- c.Check(err, check.IsNil)
- err = Splice(s.fs, dstPath+"/ctxlog-copy-copy", snap2)
- c.Check(err, check.IsNil)
+ if c.Check(err, check.IsNil) {
+ err = Splice(s.fs, dstPath+"/ctxlog-copy-copy", snap2)
+ c.Check(err, check.IsNil)
+ }
// Snapshot entire collection, splice into same collection at
// a new path, remove file from original location, verify
@@ -362,9 +365,10 @@ func (s *SiteFSSuite) TestSnapshotSplice(c *check.C) {
_, err = s.fs.Open(dstPath + "/arvados/fs_site_test.go")
c.Check(err, check.Equals, os.ErrNotExist)
f, err = s.fs.Open(dstPath + "/copy2/arvados/fs_site_test.go")
- c.Check(err, check.IsNil)
- defer f.Close()
- buf, err := ioutil.ReadAll(f)
- c.Check(err, check.IsNil)
- c.Check(string(buf), check.Equals, string(thisfile))
+ if c.Check(err, check.IsNil) {
+ defer f.Close()
+ buf, err := ioutil.ReadAll(f)
+ c.Check(err, check.IsNil)
+ c.Check(string(buf), check.Equals, string(thisfile))
+ }
}
commit 441a934dd0e98d622cd1ea15054c4f5b4198f2be
Author: Tom Clegg <tom at curii.com>
Date: Mon Mar 7 14:59:32 2022 -0500
18600: Add more invalid splice request tests.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/controller/localdb/collection.go b/lib/controller/localdb/collection.go
index 965b009f4..2283b2fcb 100644
--- a/lib/controller/localdb/collection.go
+++ b/lib/controller/localdb/collection.go
@@ -238,7 +238,7 @@ func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[s
srcspec := strings.SplitN(src, "/", 2)
srcid, srcpath := srcspec[0], "/"
if !arvadosclient.PDHMatch(srcid) {
- return httpserver.Errorf(http.StatusBadRequest, "invalid source %q for splices[%q]: must be \"\" or \"PDH[/path]\"", src, dst)
+ return httpserver.Errorf(http.StatusBadRequest, "invalid source %q for splices[%q]: must be \"\" or \"PDH\" or \"PDH/path\"", src, dst)
}
if len(srcspec) == 2 && srcspec[1] != "" {
srcpath = srcspec[1]
diff --git a/lib/controller/localdb/collection_test.go b/lib/controller/localdb/collection_test.go
index 27cdf3899..e71ae35c2 100644
--- a/lib/controller/localdb/collection_test.go
+++ b/lib/controller/localdb/collection_test.go
@@ -219,6 +219,7 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) {
c.Check(dstcopy.PortableDataHash, check.Equals, dst.PortableDataHash)
s.expectFiles(c, dstcopy, "b/corge.txt")
+ // Check invalid targets, sources, and combinations
for _, splices := range []map[string]string{
{
"/foo/nope": dst.PortableDataHash + "/b",
@@ -256,9 +257,14 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) {
c.Logf("splices %#v\n... got err: %s", splices, err)
c.Check(err, check.NotNil)
}
+
+ // Check "splices" value that isn't even the right type
for _, splices := range []interface{}{
map[string]int{"foo": 1},
map[int]string{1: "foo"},
+ 12345,
+ "foo",
+ []string{"foo"},
} {
_, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
UUID: dst.UUID,
@@ -266,8 +272,18 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) {
"splices": splices,
}})
c.Logf("splices %#v\n... got err: %s", splices, err)
- c.Check(err, check.NotNil)
+ c.Check(err, check.ErrorMatches, "invalid type .* for splices parameter")
}
+
+ // Check conflicting splices and manifest_text
+ _, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
+ UUID: dst.UUID,
+ Attrs: map[string]interface{}{
+ "splices": map[string]string{"/": ""},
+ "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:z\n",
+ }})
+ c.Logf("splices+manifest_text\n... got err: %s", err)
+ c.Check(err, check.ErrorMatches, "ambiguous request: both.*splices.*manifest_text.*")
}
// expectFiles checks coll's directory structure against the given
commit 831fac7cc24323bd48cdfd645d31153876516e55
Author: Tom Clegg <tom at curii.com>
Date: Mon Mar 7 14:10:52 2022 -0500
18600: Add API docs for "splices" feature.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/doc/api/methods/collections.html.textile.liquid b/doc/api/methods/collections.html.textile.liquid
index 01efda2b0..bea19bb75 100644
--- a/doc/api/methods/collections.html.textile.liquid
+++ b/doc/api/methods/collections.html.textile.liquid
@@ -47,7 +47,7 @@ table(table table-bordered table-condensed).
h3. Conditions of creating a Collection
-The @portable_data_hash@ and @manifest_text@ attributes must be provided when creating a Collection. The cryptographic digest of the supplied @manifest_text@ must match the supplied @portable_data_hash at .
+If a new @portable_data_hash@ is specified when creating or updating a Collection, it must match the cryptographic digest of the supplied @manifest_text at .
h3. Side effects of creating a Collection
@@ -73,6 +73,8 @@ table(table table-bordered table-condensed).
|_. Argument |_. Type |_. Description |_. Location |_. Example |
|collection|object||query||
+The new collection's content can be initialized by providing a @manifest_text@ or @splices@ key in the provided @collection@ object (see "splices":#splices below).
+
h3. delete
Put a Collection in the trash. This sets the @trash_at@ field to @now@ and @delete_at@ field to @now@ + token TTL. A trashed collection is invisible to most API calls unless the @include_trash@ parameter is true.
@@ -117,6 +119,8 @@ table(table table-bordered table-condensed).
{background:#ccffcc}.|uuid|string|The UUID of the Collection in question.|path||
|collection|object||query||
+The collection's content can be updated by providing a @manifest_text@ or @splices@ key in the provided @collection@ object (see "splices":#splices below).
+
h3. untrash
Remove a Collection from the trash. This sets the @trash_at@ and @delete_at@ fields to @null at .
@@ -160,3 +164,66 @@ Arguments:
table(table table-bordered table-condensed).
|_. Argument |_. Type |_. Description |_. Location |_. Example |
{background:#ccffcc}.|uuid|string|The UUID of the Collection to get usage.|path||
+
+h2(#splices). Using "splices" to create/update collections
+
+The @splices@ attribute can be used with the @create@ and @update@ APIs to efficiently copy individual files and directory trees from other collections, and copy/rename/delete items within an existing collection, without transferring any file data.
+
+ at splices@ keys indicate target paths in the new collection, and values specify sources that should be copied to the target paths.
+* Each target path must be an absolute canonical path beginning with @/@. It must not contain @.@ or @..@ components, consecutive @/@ characters, or a trailing @/@ after the final component.
+* Each source must be either an empty string (signifying that the target path is to be deleted), or @PDH/path@ where @PDH@ is the portable data hash of a collection on the cluster and @/path@ is a file or directory in that collection.
+* In an @update@ request, sources may reference the current portable data hash of the collection being updated.
+
+Example: delete @foo.txt@ from a collection
+
+<notextile><pre>
+"collection": {
+ "splices": {
+ "/foo.txt": ""
+ }
+}
+</pre></notextile>
+
+Example: rename @foo.txt@ to @bar.txt@ in a collection with portable data hash @fa7aeb5140e2848d39b416daeef4ffc5+45@
+
+<notextile><pre>
+"collection": {
+ "splices": {
+ "/foo.txt": "",
+ "/bar.txt": "fa7aeb5140e2848d39b416daeef4ffc5+45/foo.txt"
+ }
+}
+</pre></notextile>
+
+Example: delete current contents, then add content from multiple collections
+
+<notextile><pre>
+"collection": {
+ "splices": {
+ "/": "",
+ "/copy of collection 1": "1f4b0bc7583c2a7f9102c395f4ffc5e3+45/",
+ "/copy of collection 2": "ea10d51bcf88862dbcc36eb292017dfd+45/"
+ }
+}
+</pre></notextile>
+
+Example: replace entire collection with a copy of a subdirectory from another collection
+
+<notextile><pre>
+"collection": {
+ "splices": {
+ "/": "1f4b0bc7583c2a7f9102c395f4ffc5e3+45/subdir"
+ }
+}
+</pre></notextile>
+
+A target path with a non-empty source cannot be the ancestor of another target path in the same request. For example, the following request is invalid:
+
+<notextile><pre>
+"collection": {
+ "splices": {
+ "/foo": "fa7aeb5140e2848d39b416daeef4ffc5+45/",
+ "/foo/this_will_return_an_error": ""
+ }
+}
+</pre></notextile>
commit b031aee599d3ce1ad5a2880c32a2197d7cc66bdf
Author: Tom Clegg <tom at curii.com>
Date: Mon Mar 7 13:53:01 2022 -0500
18600: Implement collection-update API.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/controller/localdb/collection.go b/lib/controller/localdb/collection.go
index 96c89252e..965b009f4 100644
--- a/lib/controller/localdb/collection.go
+++ b/lib/controller/localdb/collection.go
@@ -6,10 +6,17 @@ package localdb
import (
"context"
+ "fmt"
+ "net/http"
+ "os"
+ "sort"
+ "strings"
"time"
"git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadosclient"
"git.arvados.org/arvados.git/sdk/go/auth"
+ "git.arvados.org/arvados.git/sdk/go/httpserver"
)
// CollectionGet defers to railsProxy for everything except blob
@@ -61,6 +68,9 @@ func (conn *Conn) CollectionCreate(ctx context.Context, opts arvados.CreateOptio
// them.
opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...)
}
+ if err := conn.applySplices(ctx, "", opts.Attrs); err != nil {
+ return arvados.Collection{}, err
+ }
resp, err := conn.railsProxy.CollectionCreate(ctx, opts)
if err != nil {
return resp, err
@@ -82,6 +92,9 @@ func (conn *Conn) CollectionUpdate(ctx context.Context, opts arvados.UpdateOptio
// them.
opts.Select = append([]string{"is_trashed", "trash_at"}, opts.Select...)
}
+ if err := conn.applySplices(ctx, opts.UUID, opts.Attrs); err != nil {
+ return arvados.Collection{}, err
+ }
resp, err := conn.railsProxy.CollectionUpdate(ctx, opts)
if err != nil {
return resp, err
@@ -108,3 +121,167 @@ func (conn *Conn) signCollection(ctx context.Context, coll *arvados.Collection)
}
coll.ManifestText = arvados.SignManifest(coll.ManifestText, token, exp, ttl, []byte(conn.cluster.Collections.BlobSigningKey))
}
+
+// If attrs["splices"] is present, populate attrs["manifest_text"] by
+// starting with the content of fromUUID (or an empty collection if
+// fromUUID is empty) and applying the specified splice operations.
+func (conn *Conn) applySplices(ctx context.Context, fromUUID string, attrs map[string]interface{}) error {
+ var splices map[string]string
+
+ // Validate the incoming attrs, and return early if the
+ // request doesn't ask for any splices.
+ if sp, ok := attrs["splices"]; !ok {
+ return nil
+ } else {
+ switch sp := sp.(type) {
+ default:
+ return httpserver.Errorf(http.StatusBadRequest, "invalid type %T for splices parameter", sp)
+ case nil:
+ return nil
+ case map[string]string:
+ splices = sp
+ case map[string]interface{}:
+ splices = make(map[string]string, len(sp))
+ for dst, src := range sp {
+ if src, ok := src.(string); ok {
+ splices[dst] = src
+ } else {
+ return httpserver.Errorf(http.StatusBadRequest, "invalid source type for splice target %q: %v", dst, src)
+ }
+ }
+ }
+ if len(splices) == 0 {
+ return nil
+ } else if mtxt, ok := attrs["manifest_text"].(string); ok && len(mtxt) > 0 {
+ return httpserver.Errorf(http.StatusBadRequest, "ambiguous request: both 'splices' and 'manifest_text' values provided")
+ }
+ }
+
+ // Load the current collection (if any) and set up an
+ // in-memory filesystem.
+ var dst arvados.Collection
+ if _, rootsplice := splices["/"]; !rootsplice && fromUUID != "" {
+ src, err := conn.CollectionGet(ctx, arvados.GetOptions{UUID: fromUUID})
+ if err != nil {
+ return err
+ }
+ dst = src
+ }
+ dstfs, err := dst.FileSystem(&arvados.StubClient{}, &arvados.StubClient{})
+ if err != nil {
+ return err
+ }
+
+ // Sort splices by source collection to avoid redundant
+ // reloads when a source collection is used more than
+ // once. Note empty sources (which mean "delete target path")
+ // sort first.
+ dstTodo := make([]string, 0, len(splices))
+ {
+ srcid := make(map[string]string, len(splices))
+ for dst, src := range splices {
+ dstTodo = append(dstTodo, dst)
+ if i := strings.IndexRune(src, '/'); i > 0 {
+ srcid[dst] = src[:i]
+ }
+ }
+ sort.Slice(dstTodo, func(i, j int) bool {
+ return srcid[dstTodo[i]] < srcid[dstTodo[j]]
+ })
+ }
+
+ // Reject attempt to splice a node as well as its descendant
+ // (e.g., a/ and a/b/), which is unsupported, except where the
+ // source for a/ is empty (i.e., delete).
+ for _, dst := range dstTodo {
+ if dst != "/" && (strings.HasSuffix(dst, "/") ||
+ strings.HasSuffix(dst, "/.") ||
+ strings.HasSuffix(dst, "/..") ||
+ strings.Contains(dst, "//") ||
+ strings.Contains(dst, "/./") ||
+ strings.Contains(dst, "/../") ||
+ !strings.HasPrefix(dst, "/")) {
+ return httpserver.Errorf(http.StatusBadRequest, "invalid splice target: %q", dst)
+ }
+ for i := 0; i < len(dst)-1; i++ {
+ if dst[i] != '/' {
+ continue
+ }
+ outerdst := dst[:i]
+ if outerdst == "" {
+ outerdst = "/"
+ }
+ if outersrc := splices[outerdst]; outersrc != "" {
+ return httpserver.Errorf(http.StatusBadRequest, "cannot splice at target %q with non-empty splice at %q", dst, outerdst)
+ }
+ }
+ }
+
+ var srcidloaded string
+ var srcfs arvados.FileSystem
+ // Apply the requested splices.
+ for _, dst := range dstTodo {
+ src := splices[dst]
+ if src == "" {
+ if dst == "/" {
+ // In this case we started with a
+ // blank manifest, so there can't be
+ // anything to delete.
+ continue
+ }
+ err := dstfs.RemoveAll(dst)
+ if err != nil {
+ return fmt.Errorf("RemoveAll(%s): %w", dst, err)
+ }
+ continue
+ }
+ srcspec := strings.SplitN(src, "/", 2)
+ srcid, srcpath := srcspec[0], "/"
+ if !arvadosclient.PDHMatch(srcid) {
+ return httpserver.Errorf(http.StatusBadRequest, "invalid source %q for splices[%q]: must be \"\" or \"PDH[/path]\"", src, dst)
+ }
+ if len(srcspec) == 2 && srcspec[1] != "" {
+ srcpath = srcspec[1]
+ }
+ if srcidloaded != srcid {
+ srcfs = nil
+ srccoll, err := conn.CollectionGet(ctx, arvados.GetOptions{UUID: srcid})
+ if err != nil {
+ return err
+ }
+ // We use StubClient here because we don't
+ // want srcfs to read/write any file data or
+ // sync collection state to/from the database.
+ srcfs, err = srccoll.FileSystem(&arvados.StubClient{}, &arvados.StubClient{})
+ if err != nil {
+ return err
+ }
+ srcidloaded = srcid
+ }
+ snap, err := arvados.Snapshot(srcfs, srcpath)
+ if err != nil {
+ return httpserver.Errorf(http.StatusBadRequest, "error getting snapshot of %q from %q: %w", srcpath, srcid, err)
+ }
+ // Create intermediate dirs, in case dst is
+ // "newdir1/newdir2/dst".
+ for i := 1; i < len(dst)-1; i++ {
+ if dst[i] == '/' {
+ err = dstfs.Mkdir(dst[:i], 0777)
+ if err != nil && !os.IsExist(err) {
+ return httpserver.Errorf(http.StatusBadRequest, "error creating parent dirs for %q: %w", dst, err)
+ }
+ }
+ }
+ err = arvados.Splice(dstfs, dst, snap)
+ if err != nil {
+ return fmt.Errorf("error splicing snapshot onto path %q: %w", dst, err)
+ }
+ }
+ mtxt, err := dstfs.MarshalManifest(".")
+ if err != nil {
+ return err
+ }
+ delete(attrs, "splices")
+ attrs["manifest_text"] = mtxt
+ return nil
+}
diff --git a/lib/controller/localdb/collection_test.go b/lib/controller/localdb/collection_test.go
index 36108b435..27cdf3899 100644
--- a/lib/controller/localdb/collection_test.go
+++ b/lib/controller/localdb/collection_test.go
@@ -7,9 +7,11 @@ package localdb
import (
"context"
"io/fs"
+ "path/filepath"
"regexp"
"sort"
"strconv"
+ "strings"
"time"
"git.arvados.org/arvados.git/lib/config"
@@ -175,30 +177,102 @@ func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) {
c.Assert(err, check.IsNil)
s.expectFiles(c, dst, "b/baz.txt", "q/", "w/qux.txt")
- // Move content within collection
+ // Move and copy content within collection
dst, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
UUID: dst.UUID,
Attrs: map[string]interface{}{
"splices": map[string]string{
+ // Note splicing content to
+ // /b/corge.txt but removing
+ // everything else from /b
"/b": "",
+ "/b/corge.txt": dst.PortableDataHash + "/b/baz.txt",
"/quux/corge.txt": dst.PortableDataHash + "/b/baz.txt",
},
}})
c.Assert(err, check.IsNil)
- s.expectFiles(c, dst, "q/", "w/qux.txt", "quux/corge.txt")
-}
+ s.expectFiles(c, dst, "b/corge.txt", "q/", "w/qux.txt", "quux/corge.txt")
-// Wrap arvados.FileSystem to satisfy the fs.FS interface (until the
-// SDK offers a neater solution) so we can use fs.WalkDir().
-type filesystemfs struct {
- arvados.FileSystem
-}
+ // Remove everything except one file
+ dst, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
+ UUID: dst.UUID,
+ Attrs: map[string]interface{}{
+ "splices": map[string]string{
+ "/": "",
+ "/b/corge.txt": dst.PortableDataHash + "/b/corge.txt",
+ },
+ }})
+ c.Assert(err, check.IsNil)
+ s.expectFiles(c, dst, "b/corge.txt")
-func (fs filesystemfs) Open(path string) (fs.File, error) {
- f, err := fs.FileSystem.Open(path)
- return f, err
+ // Copy entire collection to root
+ dstcopy, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{
+ Attrs: map[string]interface{}{
+ // Note map[string]interface{} here, which is
+ // how lib/controller/router requests will
+ // look.
+ "splices": map[string]interface{}{
+ "/": dst.PortableDataHash,
+ },
+ }})
+ c.Check(err, check.IsNil)
+ c.Check(dstcopy.PortableDataHash, check.Equals, dst.PortableDataHash)
+ s.expectFiles(c, dstcopy, "b/corge.txt")
+
+ for _, splices := range []map[string]string{
+ {
+ "/foo/nope": dst.PortableDataHash + "/b",
+ "/foo": dst.PortableDataHash + "/b",
+ },
+ {
+ "/foo": dst.PortableDataHash + "/b",
+ "/foo/nope": "",
+ },
+ {
+ "/": dst.PortableDataHash + "/",
+ "/nope": "",
+ },
+ {
+ "/": dst.PortableDataHash + "/",
+ "/nope": dst.PortableDataHash + "/b",
+ },
+ {"/bad/": ""},
+ {"/./bad": ""},
+ {"/b/./ad": ""},
+ {"/b/../ad": ""},
+ {"/b/.": ""},
+ {".": ""},
+ {"bad": ""},
+ {"": ""},
+ {"/bad": "/b"},
+ {"/bad": "bad/b"},
+ {"/bad": dst.UUID + "/b"},
+ } {
+ _, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
+ UUID: dst.UUID,
+ Attrs: map[string]interface{}{
+ "splices": splices,
+ }})
+ c.Logf("splices %#v\n... got err: %s", splices, err)
+ c.Check(err, check.NotNil)
+ }
+ for _, splices := range []interface{}{
+ map[string]int{"foo": 1},
+ map[int]string{1: "foo"},
+ } {
+ _, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
+ UUID: dst.UUID,
+ Attrs: map[string]interface{}{
+ "splices": splices,
+ }})
+ c.Logf("splices %#v\n... got err: %s", splices, err)
+ c.Check(err, check.NotNil)
+ }
}
+// expectFiles checks coll's directory structure against the given
+// list of expected files and empty directories. An expected path with
+// a trailing slash indicates an empty directory.
func (s *CollectionSuite) expectFiles(c *check.C, coll arvados.Collection, expected ...string) {
client := arvados.NewClientFromEnv()
ac, err := arvadosclient.New(client)
@@ -208,10 +282,32 @@ func (s *CollectionSuite) expectFiles(c *check.C, coll arvados.Collection, expec
cfs, err := coll.FileSystem(arvados.NewClientFromEnv(), kc)
c.Assert(err, check.IsNil)
var found []string
- fs.WalkDir(filesystemfs{cfs}, "/", func(path string, d fs.DirEntry, err error) error {
- found = append(found, path)
+ nonemptydirs := map[string]bool{}
+ fs.WalkDir(arvados.FS(cfs), "/", func(path string, d fs.DirEntry, err error) error {
+ dir, _ := filepath.Split(path)
+ nonemptydirs[dir] = true
+ if d.IsDir() {
+ if path != "/" {
+ path += "/"
+ }
+ if !nonemptydirs[path] {
+ nonemptydirs[path] = false
+ }
+ } else {
+ found = append(found, path)
+ }
return nil
})
+ for d, nonempty := range nonemptydirs {
+ if !nonempty {
+ found = append(found, d)
+ }
+ }
+ for i, path := range found {
+ if path != "/" {
+ found[i] = strings.TrimPrefix(path, "/")
+ }
+ }
sort.Strings(found)
sort.Strings(expected)
c.Check(found, check.DeepEquals, expected)
diff --git a/sdk/go/arvados/fs_backend.go b/sdk/go/arvados/fs_backend.go
index 445ac8103..cc4c32ffe 100644
--- a/sdk/go/arvados/fs_backend.go
+++ b/sdk/go/arvados/fs_backend.go
@@ -37,7 +37,7 @@ var errStubClient = errors.New("stub client")
type StubClient struct{}
func (*StubClient) ReadAt(string, []byte, int) (int, error) { return 0, errStubClient }
-func (*StubClient) LocalLocator(string) (string, error) { return "", errStubClient }
+func (*StubClient) LocalLocator(loc string) (string, error) { return loc, nil }
func (*StubClient) BlockWrite(context.Context, BlockWriteOptions) (BlockWriteResponse, error) {
return BlockWriteResponse{}, errStubClient
}
diff --git a/sdk/go/httpserver/error.go b/sdk/go/httpserver/error.go
index f1817d337..75ff85336 100644
--- a/sdk/go/httpserver/error.go
+++ b/sdk/go/httpserver/error.go
@@ -6,9 +6,14 @@ package httpserver
import (
"encoding/json"
+ "fmt"
"net/http"
)
+func Errorf(status int, tmpl string, args ...interface{}) error {
+ return errorWithStatus{fmt.Errorf(tmpl, args...), status}
+}
+
func ErrorWithStatus(err error, status int) error {
return errorWithStatus{err, status}
}
commit f40f1b051da0cc42167916409dfe5fcd9af62197
Author: Tom Clegg <tom at curii.com>
Date: Sun Mar 6 02:31:39 2022 -0500
18600: Fix Snapshot/Splice at collectionfs root.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/fs_base.go b/sdk/go/arvados/fs_base.go
index 387b6a737..bebb74261 100644
--- a/sdk/go/arvados/fs_base.go
+++ b/sdk/go/arvados/fs_base.go
@@ -463,14 +463,14 @@ func (fs *fileSystem) openFile(name string, flag int, perm os.FileMode) (*fileha
default:
return nil, fmt.Errorf("invalid flags 0x%x", flag)
}
- if !writable && parent.IsDir() {
+ if parent.IsDir() {
// A directory can be opened via "foo/", "foo/.", or
// "foo/..".
switch name {
case ".", "":
- return &filehandle{inode: parent}, nil
+ return &filehandle{inode: parent, readable: readable, writable: writable}, nil
case "..":
- return &filehandle{inode: parent.Parent()}, nil
+ return &filehandle{inode: parent.Parent(), readable: readable, writable: writable}, nil
}
}
createMode := flag&os.O_CREATE != 0
@@ -766,7 +766,7 @@ func Splice(fs FileSystem, target string, newsubtree *Subtree) error {
f, err = fs.OpenFile(target, os.O_CREATE|os.O_WRONLY, 0700)
}
if err != nil {
- return err
+ return fmt.Errorf("open %s: %w", target, err)
}
defer f.Close()
return f.Splice(newsubtree)
diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go
index 0c5819721..f4dae746e 100644
--- a/sdk/go/arvados/fs_collection.go
+++ b/sdk/go/arvados/fs_collection.go
@@ -1565,7 +1565,7 @@ func (dn *dirnode) snapshot() (*dirnode, error) {
func (dn *dirnode) Splice(repl inode) error {
repl, err := repl.Snapshot()
if err != nil {
- return err
+ return fmt.Errorf("cannot copy snapshot: %w", err)
}
switch repl := repl.(type) {
default:
@@ -1599,7 +1599,7 @@ func (dn *dirnode) Splice(repl inode) error {
defer dn.Unlock()
_, err = dn.parent.Child(dn.fileinfo.name, func(inode) (inode, error) { return repl, nil })
if err != nil {
- return err
+ return fmt.Errorf("error replacing filenode: dn.parent.Child(): %w", err)
}
repl.fs = dn.fs
}
diff --git a/sdk/go/arvados/fs_collection_test.go b/sdk/go/arvados/fs_collection_test.go
index fab91d1f7..b221aaa08 100644
--- a/sdk/go/arvados/fs_collection_test.go
+++ b/sdk/go/arvados/fs_collection_test.go
@@ -1441,6 +1441,30 @@ func (s *CollectionFSSuite) TestEdgeCaseManifests(c *check.C) {
}
}
+func (s *CollectionFSSuite) TestSnapshotSplice(c *check.C) {
+ filedata1 := "hello snapshot+splice world\n"
+ fs, err := (&Collection{}).FileSystem(s.client, s.kc)
+ c.Assert(err, check.IsNil)
+ {
+ f, err := fs.OpenFile("file1", os.O_CREATE|os.O_RDWR, 0700)
+ c.Assert(err, check.IsNil)
+ _, err = f.Write([]byte(filedata1))
+ c.Assert(err, check.IsNil)
+ err = f.Close()
+ c.Assert(err, check.IsNil)
+ }
+
+ snap, err := Snapshot(fs, "/")
+ c.Assert(err, check.IsNil)
+ err = Splice(fs, "dir1", snap)
+ c.Assert(err, check.IsNil)
+ f, err := fs.Open("dir1/file1")
+ c.Assert(err, check.IsNil)
+ buf, err := io.ReadAll(f)
+ c.Assert(err, check.IsNil)
+ c.Check(string(buf), check.Equals, filedata1)
+}
+
func (s *CollectionFSSuite) TestRefreshSignatures(c *check.C) {
filedata1 := "hello refresh signatures world\n"
fs, err := (&Collection{}).FileSystem(s.client, s.kc)
commit 5af28d3da4e6414984ddcae7796495fe149a7988
Author: Tom Clegg <tom at curii.com>
Date: Sun Mar 6 02:32:28 2022 -0500
18600: Support fs.FS interface.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/fs_base.go b/sdk/go/arvados/fs_base.go
index 80b803729..387b6a737 100644
--- a/sdk/go/arvados/fs_base.go
+++ b/sdk/go/arvados/fs_base.go
@@ -8,6 +8,7 @@ import (
"errors"
"fmt"
"io"
+ "io/fs"
"log"
"net/http"
"os"
@@ -159,6 +160,18 @@ type FileSystem interface {
MemorySize() int64
}
+type fsFS struct {
+ FileSystem
+}
+
+// FS returns an fs.FS interface to the given FileSystem, to enable
+// the use of fs.WalkDir, etc.
+func FS(fs FileSystem) fs.FS { return fsFS{fs} }
+func (fs fsFS) Open(path string) (fs.File, error) {
+ f, err := fs.FileSystem.Open(path)
+ return f, err
+}
+
type inode interface {
SetParent(parent inode, name string)
Parent() inode
diff --git a/sdk/go/arvados/fs_filehandle.go b/sdk/go/arvados/fs_filehandle.go
index 4530a7b06..f50dd4612 100644
--- a/sdk/go/arvados/fs_filehandle.go
+++ b/sdk/go/arvados/fs_filehandle.go
@@ -6,6 +6,7 @@ package arvados
import (
"io"
+ "io/fs"
"os"
)
@@ -73,6 +74,31 @@ func (f *filehandle) Write(p []byte) (n int, err error) {
return
}
+// dirEntry implements fs.DirEntry, see (*filehandle)ReadDir().
+type dirEntry struct {
+ os.FileInfo
+}
+
+func (ent dirEntry) Type() fs.FileMode {
+ return ent.Mode().Type()
+}
+func (ent dirEntry) Info() (fs.FileInfo, error) {
+ return ent, nil
+}
+
+// ReadDir implements fs.ReadDirFile.
+func (f *filehandle) ReadDir(count int) ([]fs.DirEntry, error) {
+ fis, err := f.Readdir(count)
+ if len(fis) == 0 {
+ return nil, err
+ }
+ ents := make([]fs.DirEntry, len(fis))
+ for i, fi := range fis {
+ ents[i] = dirEntry{fi}
+ }
+ return ents, err
+}
+
func (f *filehandle) Readdir(count int) ([]os.FileInfo, error) {
if !f.inode.IsDir() {
return nil, ErrInvalidOperation
commit 773cbc88601db3d7fcbb3d28a3a840747d8415b6
Author: Tom Clegg <tom at curii.com>
Date: Fri Mar 4 11:37:45 2022 -0500
18600: Add update-files test.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/controller/localdb/collection_test.go b/lib/controller/localdb/collection_test.go
index efcc830f1..36108b435 100644
--- a/lib/controller/localdb/collection_test.go
+++ b/lib/controller/localdb/collection_test.go
@@ -6,16 +6,20 @@ package localdb
import (
"context"
+ "io/fs"
"regexp"
+ "sort"
"strconv"
"time"
"git.arvados.org/arvados.git/lib/config"
"git.arvados.org/arvados.git/lib/controller/rpc"
"git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadosclient"
"git.arvados.org/arvados.git/sdk/go/arvadostest"
"git.arvados.org/arvados.git/sdk/go/auth"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "git.arvados.org/arvados.git/sdk/go/keepclient"
check "gopkg.in/check.v1"
)
@@ -119,6 +123,100 @@ func (s *CollectionSuite) TestCollectionCreateAndUpdateWithProperties(c *check.C
}
}
+func (s *CollectionSuite) TestCollectionUpdateFiles(c *check.C) {
+ ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.AdminToken}})
+ foo, err := s.localdb.railsProxy.CollectionCreate(ctx, arvados.CreateOptions{
+ Attrs: map[string]interface{}{
+ "owner_uuid": arvadostest.ActiveUserUUID,
+ "manifest_text": ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo.txt\n",
+ }})
+ c.Assert(err, check.IsNil)
+ s.localdb.signCollection(ctx, &foo)
+ foobarbaz, err := s.localdb.railsProxy.CollectionCreate(ctx, arvados.CreateOptions{
+ Attrs: map[string]interface{}{
+ "owner_uuid": arvadostest.ActiveUserUUID,
+ "manifest_text": "./foo/bar 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz.txt\n",
+ }})
+ c.Assert(err, check.IsNil)
+ s.localdb.signCollection(ctx, &foobarbaz)
+ wazqux, err := s.localdb.railsProxy.CollectionCreate(ctx, arvados.CreateOptions{
+ Attrs: map[string]interface{}{
+ "owner_uuid": arvadostest.ActiveUserUUID,
+ "manifest_text": "./waz d85b1213473c2fd7c2045020a6b9c62b+3 0:3:qux.txt\n",
+ }})
+ c.Assert(err, check.IsNil)
+ s.localdb.signCollection(ctx, &wazqux)
+
+ ctx = auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}})
+
+ // Create using content from existing collections
+ dst, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{
+ Attrs: map[string]interface{}{
+ "owner_uuid": arvadostest.ActiveUserUUID,
+ "splices": map[string]string{
+ "/f": foo.PortableDataHash + "/foo.txt",
+ "/b": foobarbaz.PortableDataHash + "/foo/bar",
+ "/q": wazqux.PortableDataHash + "/",
+ "/w": wazqux.PortableDataHash + "/waz",
+ },
+ }})
+ c.Assert(err, check.IsNil)
+ s.expectFiles(c, dst, "f", "b/baz.txt", "q/waz/qux.txt", "w/qux.txt")
+
+ // Delete a file and a directory
+ dst, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
+ UUID: dst.UUID,
+ Attrs: map[string]interface{}{
+ "splices": map[string]string{
+ "/f": "",
+ "/q/waz": "",
+ },
+ }})
+ c.Assert(err, check.IsNil)
+ s.expectFiles(c, dst, "b/baz.txt", "q/", "w/qux.txt")
+
+ // Move content within collection
+ dst, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
+ UUID: dst.UUID,
+ Attrs: map[string]interface{}{
+ "splices": map[string]string{
+ "/b": "",
+ "/quux/corge.txt": dst.PortableDataHash + "/b/baz.txt",
+ },
+ }})
+ c.Assert(err, check.IsNil)
+ s.expectFiles(c, dst, "q/", "w/qux.txt", "quux/corge.txt")
+}
+
+// Wrap arvados.FileSystem to satisfy the fs.FS interface (until the
+// SDK offers a neater solution) so we can use fs.WalkDir().
+type filesystemfs struct {
+ arvados.FileSystem
+}
+
+func (fs filesystemfs) Open(path string) (fs.File, error) {
+ f, err := fs.FileSystem.Open(path)
+ return f, err
+}
+
+func (s *CollectionSuite) expectFiles(c *check.C, coll arvados.Collection, expected ...string) {
+ client := arvados.NewClientFromEnv()
+ ac, err := arvadosclient.New(client)
+ c.Assert(err, check.IsNil)
+ kc, err := keepclient.MakeKeepClient(ac)
+ c.Assert(err, check.IsNil)
+ cfs, err := coll.FileSystem(arvados.NewClientFromEnv(), kc)
+ c.Assert(err, check.IsNil)
+ var found []string
+ fs.WalkDir(filesystemfs{cfs}, "/", func(path string, d fs.DirEntry, err error) error {
+ found = append(found, path)
+ return nil
+ })
+ sort.Strings(found)
+ sort.Strings(expected)
+ c.Check(found, check.DeepEquals, expected)
+}
+
func (s *CollectionSuite) TestSignatures(c *check.C) {
ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}})
diff --git a/sdk/go/arvados/fs_backend.go b/sdk/go/arvados/fs_backend.go
index 32365a531..445ac8103 100644
--- a/sdk/go/arvados/fs_backend.go
+++ b/sdk/go/arvados/fs_backend.go
@@ -6,6 +6,7 @@ package arvados
import (
"context"
+ "errors"
"io"
)
@@ -30,3 +31,16 @@ type keepClient interface {
type apiClient interface {
RequestAndDecode(dst interface{}, method, path string, body io.Reader, params interface{}) error
}
+
+var errStubClient = errors.New("stub client")
+
+type StubClient struct{}
+
+func (*StubClient) ReadAt(string, []byte, int) (int, error) { return 0, errStubClient }
+func (*StubClient) LocalLocator(string) (string, error) { return "", errStubClient }
+func (*StubClient) BlockWrite(context.Context, BlockWriteOptions) (BlockWriteResponse, error) {
+ return BlockWriteResponse{}, errStubClient
+}
+func (*StubClient) RequestAndDecode(_ interface{}, _, _ string, _ io.Reader, _ interface{}) error {
+ return errStubClient
+}
commit 9a966d7d7e4cc104037534f0d4e65c81961eb7a1
Author: Tom Clegg <tom at curii.com>
Date: Thu Mar 3 16:19:19 2022 -0500
18600: De-duplicate test code.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/controller/localdb/collection_test.go b/lib/controller/localdb/collection_test.go
index bbfb81116..efcc830f1 100644
--- a/lib/controller/localdb/collection_test.go
+++ b/lib/controller/localdb/collection_test.go
@@ -71,7 +71,7 @@ func (s *CollectionSuite) setUpVocabulary(c *check.C, testVocabulary string) {
s.localdb.vocabularyCache = voc
}
-func (s *CollectionSuite) TestCollectionCreateWithProperties(c *check.C) {
+func (s *CollectionSuite) TestCollectionCreateAndUpdateWithProperties(c *check.C) {
s.setUpVocabulary(c, "")
ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}})
@@ -88,6 +88,7 @@ func (s *CollectionSuite) TestCollectionCreateWithProperties(c *check.C) {
for _, tt := range tests {
c.Log(c.TestName()+" ", tt.name)
+ // Create with properties
coll, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{
Select: []string{"uuid", "properties"},
Attrs: map[string]interface{}{
@@ -99,26 +100,9 @@ func (s *CollectionSuite) TestCollectionCreateWithProperties(c *check.C) {
} else {
c.Assert(err, check.NotNil)
}
- }
-}
-
-func (s *CollectionSuite) TestCollectionUpdateWithProperties(c *check.C) {
- s.setUpVocabulary(c, "")
- ctx := auth.NewContext(context.Background(), &auth.Credentials{Tokens: []string{arvadostest.ActiveTokenV2}})
- tests := []struct {
- name string
- props map[string]interface{}
- success bool
- }{
- {"Invalid prop key", map[string]interface{}{"Priority": "IDVALIMPORTANCES1"}, false},
- {"Invalid prop value", map[string]interface{}{"IDTAGIMPORTANCES": "high"}, false},
- {"Valid prop key & value", map[string]interface{}{"IDTAGIMPORTANCES": "IDVALIMPORTANCES1"}, true},
- {"Empty properties", map[string]interface{}{}, true},
- }
- for _, tt := range tests {
- c.Log(c.TestName()+" ", tt.name)
- coll, err := s.localdb.CollectionCreate(ctx, arvados.CreateOptions{})
+ // Create, then update with properties
+ coll, err = s.localdb.CollectionCreate(ctx, arvados.CreateOptions{})
c.Assert(err, check.IsNil)
coll, err = s.localdb.CollectionUpdate(ctx, arvados.UpdateOptions{
UUID: coll.UUID,
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list