[arvados] created: 2.1.0-2617-gc502c5a50

git repository hosting git at public.arvados.org
Mon Jun 27 15:28:12 UTC 2022


        at  c502c5a50aae825683ee4cff629c6839a4209501 (commit)


commit c502c5a50aae825683ee4cff629c6839a4209501
Author: Tom Clegg <tom at curii.com>
Date:   Mon Jun 27 11:25:31 2022 -0400

    19088: Export collection/project properties as x-amz-meta tags.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/sdk/go/arvados/fs_base.go b/sdk/go/arvados/fs_base.go
index ce9253ab3..0cde825b3 100644
--- a/sdk/go/arvados/fs_base.go
+++ b/sdk/go/arvados/fs_base.go
@@ -234,6 +234,15 @@ type fileinfo struct {
 	mode    os.FileMode
 	size    int64
 	modTime time.Time
+	// Source data structure: *Collection, *Group, or
+	// nil. Currently populated only for project dirs and
+	// top-level collection dirs; *not* populated for
+	// /by_id/{uuid} dirs (only subdirs below that). Does not stay
+	// up to date with upstream changes.
+	//
+	// Intended to support keep-web's properties-as-s3-metadata
+	// feature (https://dev.arvados.org/issues/19088).
+	sys interface{}
 }
 
 // Name implements os.FileInfo.
@@ -261,9 +270,9 @@ func (fi fileinfo) Size() int64 {
 	return fi.size
 }
 
-// Sys implements os.FileInfo.
+// Sys implements os.FileInfo. See comment in fileinfo struct.
 func (fi fileinfo) Sys() interface{} {
-	return nil
+	return fi.sys
 }
 
 type nullnode struct{}
diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go
index ccfbdc4da..d3af92f9e 100644
--- a/sdk/go/arvados/fs_collection.go
+++ b/sdk/go/arvados/fs_collection.go
@@ -85,6 +85,7 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile
 				name:    ".",
 				mode:    os.ModeDir | 0755,
 				modTime: modTime,
+				sys:     c,
 			},
 			inodes: make(map[string]inode),
 		},
diff --git a/sdk/go/arvados/fs_deferred.go b/sdk/go/arvados/fs_deferred.go
index 66a126a39..07cf76003 100644
--- a/sdk/go/arvados/fs_deferred.go
+++ b/sdk/go/arvados/fs_deferred.go
@@ -24,6 +24,7 @@ func deferredCollectionFS(fs FileSystem, parent inode, coll Collection) inode {
 			name:    coll.Name,
 			modTime: modTime,
 			mode:    0755 | os.ModeDir,
+			sys:     &coll,
 		},
 	}
 	return &deferrednode{wrapped: placeholder, create: func() inode {
diff --git a/sdk/go/arvados/fs_project.go b/sdk/go/arvados/fs_project.go
index 380fb9c6d..4db87a591 100644
--- a/sdk/go/arvados/fs_project.go
+++ b/sdk/go/arvados/fs_project.go
@@ -38,6 +38,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in
 				{"uuid", "is_a", []string{"arvados#collection", "arvados#group"}},
 				{"groups.group_class", "=", "project"},
 			},
+			Select: []string{"uuid", "name", "modified_at", "properties"},
 		})
 		if err != nil {
 			return nil, err
@@ -63,7 +64,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in
 	if strings.Contains(coll.UUID, "-j7d0g-") {
 		// Group item was loaded into a Collection var -- but
 		// we only need the Name and UUID anyway, so it's OK.
-		return fs.newProjectNode(parent, coll.Name, coll.UUID), nil
+		return fs.newProjectNode(parent, coll.Name, coll.UUID, coll.Properties), nil
 	} else if strings.Contains(coll.UUID, "-4zz18-") {
 		return deferredCollectionFS(fs, parent, coll), nil
 	} else {
@@ -98,6 +99,7 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode,
 			Count:   "none",
 			Filters: filters,
 			Order:   "uuid",
+			Select:  []string{"uuid", "name", "modified_at", "properties"},
 		}
 
 		for {
@@ -121,7 +123,7 @@ func (fs *customFileSystem) projectsLoadAll(parent inode, uuid string) ([]inode,
 					continue
 				}
 				if strings.Contains(i.UUID, "-j7d0g-") {
-					inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID))
+					inodes = append(inodes, fs.newProjectNode(parent, i.Name, i.UUID, i.Properties))
 				} else if strings.Contains(i.UUID, "-4zz18-") {
 					inodes = append(inodes, deferredCollectionFS(fs, parent, i))
 				} else {
diff --git a/sdk/go/arvados/fs_site.go b/sdk/go/arvados/fs_site.go
index 3892be1e9..0a561f667 100644
--- a/sdk/go/arvados/fs_site.go
+++ b/sdk/go/arvados/fs_site.go
@@ -77,7 +77,7 @@ func (fs *customFileSystem) MountProject(mount, uuid string) {
 	fs.root.treenode.Lock()
 	defer fs.root.treenode.Unlock()
 	fs.root.treenode.Child(mount, func(inode) (inode, error) {
-		return fs.newProjectNode(fs.root, mount, uuid), nil
+		return fs.newProjectNode(fs.root, mount, uuid, nil), nil
 	})
 }
 
@@ -140,7 +140,7 @@ func (fs *customFileSystem) mountByID(parent inode, id string) inode {
 	if strings.Contains(id, "-4zz18-") || pdhRegexp.MatchString(id) {
 		return fs.mountCollection(parent, id)
 	} else if strings.Contains(id, "-j7d0g-") {
-		return fs.newProjectNode(fs.root, id, id)
+		return fs.newProjectNode(fs.root, id, id, nil)
 	} else {
 		return nil
 	}
@@ -161,7 +161,7 @@ func (fs *customFileSystem) mountCollection(parent inode, id string) inode {
 	return cfs
 }
 
-func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode {
+func (fs *customFileSystem) newProjectNode(root inode, name, uuid string, properties map[string]interface{}) inode {
 	return &lookupnode{
 		stale:   fs.Stale,
 		loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) },
@@ -174,6 +174,11 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode
 				name:    name,
 				modTime: time.Now(),
 				mode:    0755 | os.ModeDir,
+				sys: &Group{
+					GroupClass: "project",
+					UUID:       uuid,
+					Properties: properties,
+				},
 			},
 		},
 	}
diff --git a/sdk/go/arvados/fs_users.go b/sdk/go/arvados/fs_users.go
index 00f703696..ae47414b7 100644
--- a/sdk/go/arvados/fs_users.go
+++ b/sdk/go/arvados/fs_users.go
@@ -20,7 +20,7 @@ func (fs *customFileSystem) usersLoadOne(parent inode, name string) (inode, erro
 		return nil, os.ErrNotExist
 	}
 	user := resp.Items[0]
-	return fs.newProjectNode(parent, user.Username, user.UUID), nil
+	return fs.newProjectNode(parent, user.Username, user.UUID, nil), nil
 }
 
 func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) {
@@ -41,7 +41,7 @@ func (fs *customFileSystem) usersLoadAll(parent inode) ([]inode, error) {
 			if user.Username == "" {
 				continue
 			}
-			inodes = append(inodes, fs.newProjectNode(parent, user.Username, user.UUID))
+			inodes = append(inodes, fs.newProjectNode(parent, user.Username, user.UUID, nil))
 		}
 		params.Filters = []Filter{{"uuid", ">", resp.Items[len(resp.Items)-1].UUID}}
 	}
diff --git a/services/keep-web/s3.go b/services/keep-web/s3.go
index 59ab3cd43..4117dafbc 100644
--- a/services/keep-web/s3.go
+++ b/services/keep-web/s3.go
@@ -8,12 +8,14 @@ import (
 	"crypto/hmac"
 	"crypto/sha256"
 	"encoding/base64"
+	"encoding/json"
 	"encoding/xml"
 	"errors"
 	"fmt"
 	"hash"
 	"io"
 	"net/http"
+	"net/textproto"
 	"net/url"
 	"os"
 	"path/filepath"
@@ -385,6 +387,7 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
 		if r.Method == "HEAD" && !objectNameGiven {
 			// HeadBucket
 			if err == nil && fi.IsDir() {
+				setFileInfoHeaders(w.Header(), fs, fspath)
 				w.WriteHeader(http.StatusOK)
 			} else if os.IsNotExist(err) {
 				s3ErrorResponse(w, NoSuchBucket, "The specified bucket does not exist.", r.URL.Path, http.StatusNotFound)
@@ -394,6 +397,7 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
 			return true
 		}
 		if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Cluster.Collections.S3FolderObjects {
+			setFileInfoHeaders(w.Header(), fs, fspath)
 			w.Header().Set("Content-Type", "application/x-directory")
 			w.WriteHeader(http.StatusOK)
 			return true
@@ -415,6 +419,7 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
 		// shallow copy r, and change URL path
 		r := *r
 		r.URL.Path = fspath
+		setFileInfoHeaders(w.Header(), fs, fspath)
 		http.FileServer(fs).ServeHTTP(w, &r)
 		return true
 	case r.Method == http.MethodPut:
@@ -586,6 +591,48 @@ func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool {
 	}
 }
 
+func setFileInfoHeaders(header http.Header, fs arvados.CustomFileSystem, path string) {
+	path = strings.TrimSuffix(path, "/")
+	var props map[string]interface{}
+	for {
+		fi, err := fs.Stat(path)
+		if err != nil {
+			return
+		}
+		switch src := fi.Sys().(type) {
+		case *arvados.Collection:
+			props = src.Properties
+		case *arvados.Group:
+			props = src.Properties
+		default:
+			// Try parent
+			cut := strings.LastIndexByte(path, '/')
+			if cut < 0 {
+				return
+			}
+			path = path[:cut]
+			continue
+		}
+		break
+	}
+	for k, v := range props {
+		if !validMIMEHeaderKey(k) {
+			continue
+		}
+		k = "x-amz-meta-" + k
+		if s, ok := v.(string); ok {
+			header.Set(k, s)
+		} else if j, err := json.Marshal(v); err == nil {
+			header.Set(k, string(j))
+		}
+	}
+}
+
+func validMIMEHeaderKey(k string) bool {
+	check := "z-" + k
+	return check != textproto.CanonicalMIMEHeaderKey(check)
+}
+
 // Call fn on the given path (directory) and its contents, in
 // lexicographic order.
 //
diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go
index 261ebb574..a2e61e9b7 100644
--- a/services/keep-web/s3_test.go
+++ b/services/keep-web/s3_test.go
@@ -39,12 +39,13 @@ type s3stage struct {
 	kc         *keepclient.KeepClient
 	proj       arvados.Group
 	projbucket *s3.Bucket
+	subproj    arvados.Group
 	coll       arvados.Collection
 	collbucket *s3.Bucket
 }
 
 func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
-	var proj arvados.Group
+	var proj, subproj arvados.Group
 	var coll arvados.Collection
 	arv := arvados.NewClientFromEnv()
 	arv.AuthToken = arvadostest.ActiveToken
@@ -56,10 +57,27 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
 		"ensure_unique_name": true,
 	})
 	c.Assert(err, check.IsNil)
+	err = arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
+		"group": map[string]interface{}{
+			"owner_uuid":  proj.UUID,
+			"group_class": "project",
+			"name":        "keep-web s3 test subproject",
+			"properties": map[string]interface{}{
+				"subproject_properties_key": "subproject properties value",
+				"invalid header key":        "this value will not be returned because key contains spaces",
+			},
+		},
+	})
+	c.Assert(err, check.IsNil)
 	err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
 		"owner_uuid":    proj.UUID,
 		"name":          "keep-web s3 test collection",
 		"manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
+		"properties": map[string]interface{}{
+			"string": "string value",
+			"array":  []string{"element1", "element2"},
+			"object": map[string]interface{}{"key": map[string]interface{}{"key2": "value"}},
+		},
 	}})
 	c.Assert(err, check.IsNil)
 	ac, err := arvadosclient.New(arv)
@@ -95,7 +113,8 @@ func (s *IntegrationSuite) s3setup(c *check.C) s3stage {
 			S3:   client,
 			Name: proj.UUID,
 		},
-		coll: coll,
+		subproj: subproj,
+		coll:    coll,
 		collbucket: &s3.Bucket{
 			S3:   client,
 			Name: coll.UUID,
@@ -215,6 +234,46 @@ func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix
 	c.Check(exists, check.Equals, true)
 }
 
+func (s *IntegrationSuite) checkMetaEquals(c *check.C, resp *http.Response, expect map[string]string) {
+	got := map[string]string{}
+	for hk, hv := range resp.Header {
+		if k := strings.TrimPrefix(hk, "X-Amz-Meta-"); k != hk && len(hv) == 1 {
+			got[k] = hv[0]
+		}
+	}
+	c.Check(got, check.DeepEquals, expect)
+}
+
+func (s *IntegrationSuite) TestS3PropertiesAsMetadata(c *check.C) {
+	stage := s.s3setup(c)
+	defer stage.teardown(c)
+
+	expectCollectionTags := map[string]string{
+		"String": "string value",
+		"Array":  `["element1","element2"]`,
+		"Object": `{"key":{"key2":"value"}}`,
+	}
+	expectSubprojectTags := map[string]string{
+		"Subproject_properties_key": "subproject properties value",
+	}
+
+	resp, err := stage.collbucket.Head("sailboat.txt", nil)
+	c.Assert(err, check.IsNil)
+	s.checkMetaEquals(c, resp, expectCollectionTags)
+
+	resp, err = stage.projbucket.Head("keep-web s3 test collection/", nil)
+	c.Assert(err, check.IsNil)
+	s.checkMetaEquals(c, resp, expectCollectionTags)
+
+	resp, err = stage.projbucket.Head("keep-web s3 test collection/sailboat.txt", nil)
+	c.Assert(err, check.IsNil)
+	s.checkMetaEquals(c, resp, expectCollectionTags)
+
+	resp, err = stage.projbucket.Head("keep-web s3 test subproject/", nil)
+	c.Assert(err, check.IsNil)
+	s.checkMetaEquals(c, resp, expectSubprojectTags)
+}
+
 func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) {
 	stage := s.s3setup(c)
 	defer stage.teardown(c)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list