[arvados] updated: 2.1.0-2620-gdd8f1b052

git repository hosting git at public.arvados.org
Mon Jun 27 19:07:30 UTC 2022


Summary of changes:
 doc/api/keep-s3.html.textile.liquid | 16 +++++++++++
 sdk/go/arvados/fs_base.go           | 16 ++++++-----
 sdk/go/arvados/fs_collection.go     |  2 +-
 sdk/go/arvados/fs_deferred.go       |  2 +-
 sdk/go/arvados/fs_project.go        |  9 ++++--
 sdk/go/arvados/fs_site.go           | 20 +++++++++----
 services/keep-web/s3.go             | 41 +++++++++++++++++++++------
 services/keep-web/s3_test.go        | 56 +++++++++++++++++++++++++++++--------
 8 files changed, 126 insertions(+), 36 deletions(-)

       via  dd8f1b0527995bc5ad47710d3a483fa18b827bc6 (commit)
       via  9568d12b9bb5db7f39c9627d0059c72cfc6f2eb1 (commit)
       via  b041a675c577e174680913e0da0bf69b1cca83b6 (commit)
      from  c502c5a50aae825683ee4cff629c6839a4209501 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit dd8f1b0527995bc5ad47710d3a483fa18b827bc6
Author: Tom Clegg <tom at curii.com>
Date:   Mon Jun 27 15:06:31 2022 -0400

    19088: Document S3 properties-as-metadata feature.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/doc/api/keep-s3.html.textile.liquid b/doc/api/keep-s3.html.textile.liquid
index bee91516b..2e0c90ef4 100644
--- a/doc/api/keep-s3.html.textile.liquid
+++ b/doc/api/keep-s3.html.textile.liquid
@@ -70,6 +70,22 @@ h4. GetBucketVersioning
 
 Bucket versioning is presently not supported, so this will always respond that bucket versioning is not enabled.
 
+h3. Accessing collection/project properties as metadata
+
+GetObject, HeadObject, and HeadBucket return Arvados object properties as S3 metadata headers, e.g., @X-Amz-Meta-Foo: bar at .
+
+If the requested path indicates a file or directory placeholder inside a collection, or the top level of a collection, GetObject and HeadObject return the collection properties.
+
+If the requested path indicates a directory placeholder corresponding to a project, GetObject and HeadObject return the properties of the project.
+
+HeadBucket returns the properties of the collection or project corresponding to the bucket name.
+
+Non-string property values are returned in a JSON representation, e.g., @["foo","bar"]@.
+
+As in Amazon S3, property values containing non-ASCII characters are returned in BASE64-encoded form as described in RFC 2047, e.g., @=?UTF-8?b?4pu1?=@.
+
+It is not possible to modify collection or project properties using the S3 API.
+
 h3. Authorization mechanisms
 
 Keep-web accepts AWS Signature Version 4 (AWS4-HMAC-SHA256) as well as the older V2 AWS signature.

commit 9568d12b9bb5db7f39c9627d0059c72cfc6f2eb1
Author: Tom Clegg <tom at curii.com>
Date:   Mon Jun 27 15:05:58 2022 -0400

    19088: base64-encode non-ascii property values in response headers.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/services/keep-web/s3.go b/services/keep-web/s3.go
index d92828e06..90b75f8a3 100644
--- a/services/keep-web/s3.go
+++ b/services/keep-web/s3.go
@@ -14,6 +14,7 @@ import (
 	"fmt"
 	"hash"
 	"io"
+	"mime"
 	"net/http"
 	"net/textproto"
 	"net/url"
@@ -604,6 +605,14 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
 }
 
 func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) error {
+	maybeEncode := func(s string) string {
+		for _, c := range s {
+			if c > '\u007f' {
+				return mime.BEncoding.Encode("UTF-8", s)
+			}
+		}
+		return s
+	}
 	path = strings.TrimSuffix(path, "/")
 	var props map[string]interface{}
 	for {
@@ -636,9 +645,9 @@ func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path st
 		}
 		k = "x-amz-meta-" + k
 		if s, ok := v.(string); ok {
-			header.Set(k, s)
+			header.Set(k, maybeEncode(s))
 		} else if j, err := json.Marshal(v); err == nil {
-			header.Set(k, string(j))
+			header.Set(k, maybeEncode(string(j)))
 		}
 	}
 	return nil
diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go
index b25ef972d..a99f3c278 100644
--- a/services/keep-web/s3_test.go
+++ b/services/keep-web/s3_test.go
@@ -11,6 +11,7 @@ import (
 	"crypto/sha256"
 	"fmt"
 	"io/ioutil"
+	"mime"
 	"net/http"
 	"net/http/httptest"
 	"net/url"
@@ -77,9 +78,10 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
 		"name":          "keep-web s3 test collection",
 		"manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
 		"properties": map[string]interface{}{
-			"string": "string value",
-			"array":  []string{"element1", "element2"},
-			"object": map[string]interface{}{"key": map[string]interface{}{"key2": "value"}},
+			"string":   "string value",
+			"array":    []string{"element1", "element2"},
+			"object":   map[string]interface{}{"key": map[string]interface{}{"key2": "value⛵"}},
+			"nonascii": "⛵",
 		},
 	}})
 	c.Assert(err, check.IsNil)
@@ -252,9 +254,10 @@ func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) {
 	defer stage.teardown(c)
 
 	expectCollectionTags := map[string]string{
-		"String": "string value",
-		"Array":  `["element1","element2"]`,
-		"Object": `{"key":{"key2":"value"}}`,
+		"String":   "string value",
+		"Array":    `["element1","element2"]`,
+		"Object":   mime.BEncoding.Encode("UTF-8", `{"key":{"key2":"value⛵"}}`),
+		"Nonascii": "=?UTF-8?b?4pu1?=",
 	}
 	expectSubprojectTags := map[string]string{
 		"Subproject_properties_key": "subproject properties value",

commit b041a675c577e174680913e0da0bf69b1cca83b6
Author: Tom Clegg <tom at curii.com>
Date:   Mon Jun 27 14:19:45 2022 -0400

    19088: Export collection/project properties as bucket-level tags.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/sdk/go/arvados/fs_base.go b/sdk/go/arvados/fs_base.go
index 0cde825b3..2ad4d1f85 100644
--- a/sdk/go/arvados/fs_base.go
+++ b/sdk/go/arvados/fs_base.go
@@ -234,15 +234,14 @@ type fileinfo struct {
 	mode    os.FileMode
 	size    int64
 	modTime time.Time
-	// Source data structure: *Collection, *Group, or
-	// nil. Currently populated only for project dirs and
-	// top-level collection dirs; *not* populated for
-	// /by_id/{uuid} dirs (only subdirs below that). Does not stay
-	// up to date with upstream changes.
+	// If not nil, sys() returns the source data structure, which
+	// can be a *Collection, *Group, or nil. Currently populated
+	// only for project dirs and top-level collection dirs. Does
+	// not stay up to date with upstream changes.
 	//
 	// Intended to support keep-web's properties-as-s3-metadata
 	// feature (https://dev.arvados.org/issues/19088).
-	sys interface{}
+	sys func() interface{}
 }
 
 // Name implements os.FileInfo.
@@ -272,7 +271,10 @@ func (fi fileinfo) Size() int64 {
 
 // Sys implements os.FileInfo. See comment in fileinfo struct.
 func (fi fileinfo) Sys() interface{} {
-	return fi.sys
+	if fi.sys == nil {
+		return nil
+	}
+	return fi.sys()
 }
 
 type nullnode struct{}
diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go
index d3af92f9e..26012e240 100644
--- a/sdk/go/arvados/fs_collection.go
+++ b/sdk/go/arvados/fs_collection.go
@@ -85,7 +85,7 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile
 				name:    ".",
 				mode:    os.ModeDir | 0755,
 				modTime: modTime,
-				sys:     c,
+				sys:     func() interface{} { return c },
 			},
 			inodes: make(map[string]inode),
 		},
diff --git a/sdk/go/arvados/fs_deferred.go b/sdk/go/arvados/fs_deferred.go
index 07cf76003..1dfa2df6e 100644
--- a/sdk/go/arvados/fs_deferred.go
+++ b/sdk/go/arvados/fs_deferred.go
@@ -24,7 +24,7 @@ func deferredCollectionFS(fs FileSystem, parent inode, coll Collection) inode {
 			name:    coll.Name,
 			modTime: modTime,
 			mode:    0755 | os.ModeDir,
-			sys:     &coll,
+			sys:     func() interface{} { return &coll },
 		},
 	}
 	return &deferrednode{wrapped: placeholder, create: func() inode {
diff --git a/sdk/go/arvados/fs_project.go b/sdk/go/arvados/fs_project.go
index 4db87a591..bea1f76e2 100644
--- a/sdk/go/arvados/fs_project.go
+++ b/sdk/go/arvados/fs_project.go
@@ -64,7 +64,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in
 	if strings.Contains(coll.UUID, "-j7d0g-") {
 		// Group item was loaded into a Collection var -- but
 		// we only need the Name and UUID anyway, so it's OK.
-		return fs.newProjectNode(parent, coll.Name, coll.UUID, coll.Properties), nil
+		return fs.newProjectNode(parent, coll.Name, coll.UUID, nil), nil
 	} else if strings.Contains(coll.UUID, "-4zz18-") {
 		return deferredCollectionFS(fs, parent, coll), nil
 	} else {
@@ -123,7 +123,12 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode,
 					continue
 				}
 				if strings.Contains(i.UUID, "-j7d0g-") {
-					inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID, i.Properties))
+					inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID, &Group{
+						UUID:       i.UUID,
+						Name:       i.Name,
+						ModifiedAt: i.ModifiedAt,
+						Properties: i.Properties,
+					}))
 				} else if strings.Contains(i.UUID, "-4zz18-") {
 					inodes = append(inodes, deferredCollectionFS(fs, parent, i))
 				} else {
diff --git a/sdk/go/arvados/fs_site.go b/sdk/go/arvados/fs_site.go
index 0a561f667..bb2eee779 100644
--- a/sdk/go/arvados/fs_site.go
+++ b/sdk/go/arvados/fs_site.go
@@ -161,7 +161,8 @@ func (fs *customFileSystem) mountCollection(parent inode, id string) inode {
 	return cfs
 }
 
-func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, properties map[string]interface{}) inode {
+func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, proj *Group) inode {
+	var projLoading sync.Mutex
 	return &lookupnode{
 		stale:   fs.Stale,
 		loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) },
@@ -174,10 +175,19 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, proper
 				name:    name,
 				modTime: time.Now(),
 				mode:    0755 | os.ModeDir,
-				sys: &Group{
-					GroupClass: "project",
-					UUID:       uuid,
-					Properties: properties,
+				sys: func() interface{} {
+					projLoading.Lock()
+					defer projLoading.Unlock()
+					if proj != nil {
+						return proj
+					}
+					var g Group
+					err := fs.RequestAndDecode(&g, "GET", "arvados/v1/groups/"+uuid, nil, nil)
+					if err != nil {
+						return err
+					}
+					proj = &g
+					return proj
 				},
 			},
 		},
diff --git a/services/keep-web/s3.go b/services/keep-web/s3.go
index 4117dafbc..d92828e06 100644
--- a/services/keep-web/s3.go
+++ b/services/keep-web/s3.go
@@ -387,7 +387,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
 		if r.Method == "HEAD" && !objectNameGiven {
 			// HeadBucket
 			if err == nil && fi.IsDir() {
-				setFileInfoHeaders(w.Header(), fs, fspath)
+				err = setFileInfoHeaders(w.Header(), fs, fspath)
+				if err != nil {
+					s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+					return true
+				}
 				w.WriteHeader(http.StatusOK)
 			} else if os.IsNotExist(err) {
 				s3ErrorResponse(w, NoSuchBucket, "The specified bucket does not exist.", r.URL.Path, http.StatusNotFound)
@@ -397,7 +401,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
 			return true
 		}
 		if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Cluster.Collections.S3FolderObjects {
-			setFileInfoHeaders(w.Header(), fs, fspath)
+			err = setFileInfoHeaders(w.Header(), fs, fspath)
+			if err != nil {
+				s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+				return true
+			}
 			w.Header().Set("Content-Type", "application/x-directory")
 			w.WriteHeader(http.StatusOK)
 			return true
@@ -419,7 +427,11 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
 		// shallow copy r, and change URL path
 		r := *r
 		r.URL.Path = fspath
-		setFileInfoHeaders(w.Header(), fs, fspath)
+		err = setFileInfoHeaders(w.Header(), fs, fspath)
+		if err != nil {
+			s3ErrorResponse(w, InternalError, err.Error(), r.URL.Path, http.StatusBadGateway)
+			return true
+		}
 		http.FileServer(fs).ServeHTTP(w, &r)
 		return true
 	case r.Method == http.MethodPut:
@@ -591,13 +603,13 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
 	}
 }
 
-func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) {
+func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) error {
 	path = strings.TrimSuffix(path, "/")
 	var props map[string]interface{}
 	for {
 		fi, err := fs.Stat(path)
 		if err != nil {
-			return
+			return err
 		}
 		switch src := fi.Sys().(type) {
 		case *arvados.Collection:
@@ -605,10 +617,13 @@ func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path st
 		case *arvados.Group:
 			props = src.Properties
 		default:
+			if err, ok := src.(error); ok {
+				return err
+			}
 			// Try parent
 			cut := strings.LastIndexByte(path, '/')
 			if cut < 0 {
-				return
+				return nil
 			}
 			path = path[:cut]
 			continue
@@ -626,6 +641,7 @@ func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path st
 			header.Set(k, string(j))
 		}
 	}
+	return nil
 }
 
 func validMIMEHeaderKey(k string) bool {
diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go
index a2e61e9b7..b25ef972d 100644
--- a/services/keep-web/s3_test.go
+++ b/services/keep-web/s3_test.go
@@ -53,6 +53,9 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
 		"group": map[string]interface{}{
 			"group_class": "project",
 			"name":        "keep-web s3 test",
+			"properties": map[string]interface{}{
+				"project-properties-key": "project properties value",
+			},
 		},
 		"ensure_unique_name": true,
 	})
@@ -234,9 +237,9 @@ func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix
 	c.Check(exists, check.Equals, true)
 }
 
-func (s *IntegrationSuite) checkMetaEquals(c *check.C, resp *http.Response, expect map[string]string) {
+func (s *IntegrationSuite) checkMetaEquals(c *check.C, hdr http.Header, expect map[string]string) {
 	got := map[string]string{}
-	for hk, hv := range resp.Header {
+	for hk, hv := range hdr {
 		if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 {
 			got[k] = hv[0]
 		}
@@ -256,22 +259,48 @@ func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) {
 	expectSubprojectTags := map[string]string{
 		"Subproject_properties_key": "subproject properties value",
 	}
+	expectProjectTags := map[string]string{
+		"Project-Properties-Key": "project properties value",
+	}
 
+	c.Log("HEAD object with metadata from collection")
 	resp, err := stage.collbucket.Head("sailboat.txt", nil)
 	c.Assert(err, check.IsNil)
-	s.checkMetaEquals(c, resp, expectCollectionTags)
+	s.checkMetaEquals(c, resp.Header, expectCollectionTags)
+
+	c.Log("GET object with metadata from collection")
+	rdr, hdr, err := stage.collbucket.GetReaderWithHeaders("sailboat.txt")
+	c.Assert(err, check.IsNil)
+	content, err := ioutil.ReadAll(rdr)
+	c.Check(err, check.IsNil)
+	rdr.Close()
+	c.Check(content, check.HasLen, 4)
+	s.checkMetaEquals(c, hdr, expectCollectionTags)
+
+	c.Log("HEAD bucket with metadata from collection")
+	resp, err = stage.collbucket.Head("/", nil)
+	c.Assert(err, check.IsNil)
+	s.checkMetaEquals(c, resp.Header, expectCollectionTags)
 
+	c.Log("HEAD directory placeholder with metadata from collection")
 	resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil)
 	c.Assert(err, check.IsNil)
-	s.checkMetaEquals(c, resp, expectCollectionTags)
+	s.checkMetaEquals(c, resp.Header, expectCollectionTags)
 
+	c.Log("HEAD file with metadata from collection")
 	resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil)
 	c.Assert(err, check.IsNil)
-	s.checkMetaEquals(c, resp, expectCollectionTags)
+	s.checkMetaEquals(c, resp.Header, expectCollectionTags)
 
+	c.Log("HEAD directory placeholder with metadata from subproject")
 	resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil)
 	c.Assert(err, check.IsNil)
-	s.checkMetaEquals(c, resp, expectSubprojectTags)
+	s.checkMetaEquals(c, resp.Header, expectSubprojectTags)
+
+	c.Log("HEAD bucket with metadata from project")
+	resp, err = stage.projbucket.Head("/", nil)
+	c.Assert(err, check.IsNil)
+	s.checkMetaEquals(c, resp.Header, expectProjectTags)
 }
 
 func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list