[arvados] created: 2.6.0-369-g0c881f2fa
git repository hosting
git at public.arvados.org
Wed Jul 12 04:20:19 UTC 2023
at 0c881f2fa420fbe98be2431d763a13f249f8edfc (commit)
commit 0c881f2fa420fbe98be2431d763a13f249f8edfc
Author: Tom Clegg <tom at curii.com>
Date: Wed Jul 12 00:19:08 2023 -0400
20726: Fix traversing projects/collections that precede page marker.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/services/keep-web/s3.go b/services/keep-web/s3.go
index fd8764da8..8615ceaf0 100644
--- a/services/keep-web/s3.go
+++ b/services/keep-web/s3.go
@@ -790,6 +790,13 @@ func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request,
params.marker = r.FormValue("marker")
}
+ // startAfter is params.marker or params.startAfter, whichever
+ // comes last.
+ startAfter := params.startAfter
+ if startAfter < params.marker {
+ startAfter = params.marker
+ }
+
bucketdir := "by_id/" + bucket
// walkpath is the directory (relative to bucketdir) we need
// to walk: the innermost directory that is guaranteed to
@@ -821,6 +828,7 @@ func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request,
nextMarker := ""
commonPrefixes := map[string]bool{}
+ full := false
err := walkFS(fs, strings.TrimSuffix(bucketdir+"/"+walkpath, "/"), true, func(path string, fi os.FileInfo) error {
if path == bucketdir {
return nil
@@ -831,36 +839,29 @@ func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request,
path += "/"
filesize = 0
}
- if len(path) <= len(params.prefix) {
- if path > params.prefix[:len(path)] {
- // with prefix "foobar", walking "fooz" means we're done
- return errDone
- }
- if path < params.prefix[:len(path)] {
- // with prefix "foobar", walking "foobag" is pointless
- return filepath.SkipDir
- }
- if fi.IsDir() && !strings.HasPrefix(params.prefix+"/", path) {
- // with prefix "foo/bar", walking "fo"
- // is pointless (but walking "foo" or
- // "foo/bar" is necessary)
- return filepath.SkipDir
- }
- if len(path) < len(params.prefix) {
- // can't skip anything, and this entry
- // isn't in the results, so just
- // continue descent
- return nil
- }
- } else {
- if path[:len(params.prefix)] > params.prefix {
- // with prefix "foobar", nothing we
- // see after "foozzz" is relevant
- return errDone
- }
- }
- if path <= params.marker || path < params.prefix || path <= params.startAfter {
+ if strings.HasPrefix(params.prefix, path) && params.prefix != path {
+ // Descend into subtree until we reach desired prefix
return nil
+ } else if path < params.prefix {
+ // Not an ancestor or descendant of desired
+ // prefix, therefore none of its descendants
+ // can be either -- skip
+ return filepath.SkipDir
+ } else if path > params.prefix && !strings.HasPrefix(path, params.prefix) {
+ // We must have traversed everything under
+ // desired prefix
+ return errDone
+ } else if path == startAfter {
+ // Skip startAfter itself, just descend into
+ // subtree
+ return nil
+ } else if strings.HasPrefix(startAfter, path) {
+ // Descend into subtree in case it contains
+ // something after startAfter
+ return nil
+ } else if path < startAfter {
+ // Skip ahead until we reach startAfter
+ return filepath.SkipDir
}
if fi.IsDir() && !h.Cluster.Collections.S3FolderObjects {
// Note we don't add anything to
@@ -870,10 +871,6 @@ func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request,
// finding a regular file inside it.
return nil
}
- if len(resp.Contents)+len(commonPrefixes) >= params.maxKeys {
- resp.IsTruncated = true
- return errDone
- }
if params.delimiter != "" {
idx := strings.Index(path[len(params.prefix):], params.delimiter)
if idx >= 0 {
@@ -882,19 +879,32 @@ func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request,
// add "/baz" to commonPrefixes and
// stop descending.
prefix := path[:len(params.prefix)+idx+1]
- if prefix != params.marker {
+ if prefix == startAfter {
+ return nil
+ } else if prefix < startAfter && !strings.HasPrefix(startAfter, prefix) {
+ return nil
+ } else if full {
+ resp.IsTruncated = true
+ return errDone
+ } else {
commonPrefixes[prefix] = true
nextMarker = prefix
+ full = len(resp.Contents)+len(commonPrefixes) >= params.maxKeys
+ return filepath.SkipDir
}
- return filepath.SkipDir
}
}
+ if full {
+ resp.IsTruncated = true
+ return errDone
+ }
resp.Contents = append(resp.Contents, s3Key{
Key: path,
LastModified: fi.ModTime().UTC().Format("2006-01-02T15:04:05.999") + "Z",
Size: filesize,
})
nextMarker = path
+ full = len(resp.Contents)+len(commonPrefixes) >= params.maxKeys
return nil
})
if err != nil && err != errDone {
diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go
index 556f8ba2c..9f8650b8e 100644
--- a/services/keep-web/s3_test.go
+++ b/services/keep-web/s3_test.go
@@ -943,7 +943,8 @@ func (s *IntegrationSuite) testS3CollectionListRollup(c *check.C) {
{"dir0", "", ""},
{"dir0/", "", ""},
{"dir0/f", "", ""},
- {"dir0", "/", "dir0/file14.txt"}, // no commonprefixes
+ {"dir0", "/", "dir0/file14.txt"}, // one commonprefix, "dir0/"
+ {"dir0", "/", "dir0/zzzzfile.txt"}, // no commonprefixes
{"", "", "dir0/file14.txt"}, // middle page, skip walking dir1
{"", "", "dir1/file14.txt"}, // middle page, skip walking dir0
{"", "", "dir1/file498.txt"}, // last page of results
@@ -1017,6 +1018,61 @@ func (s *IntegrationSuite) testS3CollectionListRollup(c *check.C) {
}
}
+func (s *IntegrationSuite) TestS3ListObjectsV2ManySubprojects(c *check.C) {
+ stage := s.s3setup(c)
+ defer stage.teardown(c)
+ projects := 50
+ collectionsPerProject := 2
+ for i := 0; i < projects; i++ {
+ var subproj arvados.Group
+ err := stage.arv.RequestAndDecode(&subproj, "POST", "arvados/v1/groups", nil, map[string]interface{}{
+ "group": map[string]interface{}{
+ "owner_uuid": stage.subproj.UUID,
+ "group_class": "project",
+ "name": fmt.Sprintf("keep-web s3 test subproject %d", i),
+ },
+ })
+ c.Assert(err, check.IsNil)
+ for j := 0; j < collectionsPerProject; j++ {
+ err = stage.arv.RequestAndDecode(nil, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{
+ "owner_uuid": subproj.UUID,
+ "name": fmt.Sprintf("keep-web s3 test collection %d", j),
+ "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n",
+ }})
+ c.Assert(err, check.IsNil)
+ }
+ }
+ c.Logf("setup complete")
+
+ sess := aws_session.Must(aws_session.NewSession(&aws_aws.Config{
+ Region: aws_aws.String("auto"),
+ Endpoint: aws_aws.String(s.testServer.URL),
+ Credentials: aws_credentials.NewStaticCredentials(url.QueryEscape(arvadostest.ActiveTokenV2), url.QueryEscape(arvadostest.ActiveTokenV2), ""),
+ S3ForcePathStyle: aws_aws.Bool(true),
+ }))
+ client := aws_s3.New(sess)
+ ctx := context.Background()
+ params := aws_s3.ListObjectsV2Input{
+ Bucket: aws_aws.String(stage.proj.UUID),
+ Delimiter: aws_aws.String("/"),
+ Prefix: aws_aws.String("keep-web s3 test subproject/"),
+ MaxKeys: aws_aws.Int64(int64(projects / 2)),
+ }
+ for page := 1; ; page++ {
+ t0 := time.Now()
+ result, err := client.ListObjectsV2WithContext(ctx, ¶ms)
+ if !c.Check(err, check.IsNil) {
+ break
+ }
+ c.Logf("got page %d in %v with len(Contents) == %d, len(CommonPrefixes) == %d", page, time.Since(t0), len(result.Contents), len(result.CommonPrefixes))
+ if !*result.IsTruncated {
+ break
+ }
+ params.ContinuationToken = result.NextContinuationToken
+ *params.MaxKeys = *params.MaxKeys/2 + 1
+ }
+}
+
func (s *IntegrationSuite) TestS3ListObjectsV2(c *check.C) {
stage := s.s3setup(c)
defer stage.teardown(c)
commit 8e57b2534bbf5ece2518c3c24ce79c5bb07adf3a
Author: Tom Clegg <tom at curii.com>
Date: Tue Jul 11 15:25:22 2023 -0400
20726: Fix ListObjects[V2] pages duplicating last item on next page.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/services/keep-web/s3.go b/services/keep-web/s3.go
index 38428cdab..fd8764da8 100644
--- a/services/keep-web/s3.go
+++ b/services/keep-web/s3.go
@@ -772,6 +772,9 @@ func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request,
http.Error(w, "invalid continuation token", http.StatusBadRequest)
return
}
+ // marker and start-after perform the same function,
+ // but we keep them separate so we can repeat them
+ // back to the client in the response.
params.marker = string(marker)
params.startAfter = r.FormValue("start-after")
switch r.FormValue("encoding-type") {
@@ -783,6 +786,7 @@ func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request,
return
}
} else {
+ // marker is functionally equivalent to start-after.
params.marker = r.FormValue("marker")
}
@@ -809,6 +813,11 @@ func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request,
ContinuationToken: r.FormValue("continuation-token"),
StartAfter: params.startAfter,
}
+
+ // nextMarker will be the last path we add to either
+ // resp.Contents or commonPrefixes. It will be included in
+ // the response as NextMarker or NextContinuationToken if
+ // needed.
nextMarker := ""
commonPrefixes := map[string]bool{}
@@ -850,7 +859,7 @@ func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request,
return errDone
}
}
- if path < params.marker || path < params.prefix || path <= params.startAfter {
+ if path <= params.marker || path < params.prefix || path <= params.startAfter {
return nil
}
if fi.IsDir() && !h.Cluster.Collections.S3FolderObjects {
@@ -863,9 +872,6 @@ func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request,
}
if len(resp.Contents)+len(commonPrefixes) >= params.maxKeys {
resp.IsTruncated = true
- if params.delimiter != "" || params.v2 {
- nextMarker = path
- }
return errDone
}
if params.delimiter != "" {
@@ -875,7 +881,11 @@ func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request,
// "z", when we hit "foobar/baz", we
// add "/baz" to commonPrefixes and
// stop descending.
- commonPrefixes[path[:len(params.prefix)+idx+1]] = true
+ prefix := path[:len(params.prefix)+idx+1]
+ if prefix != params.marker {
+ commonPrefixes[prefix] = true
+ nextMarker = prefix
+ }
return filepath.SkipDir
}
}
@@ -884,12 +894,16 @@ func (h *handler) s3list(bucket string, w http.ResponseWriter, r *http.Request,
LastModified: fi.ModTime().UTC().Format("2006-01-02T15:04:05.999") + "Z",
Size: filesize,
})
+ nextMarker = path
return nil
})
if err != nil && err != errDone {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
+ if params.delimiter == "" && !params.v2 || !resp.IsTruncated {
+ nextMarker = ""
+ }
if params.delimiter != "" {
resp.CommonPrefixes = make([]commonPrefix, 0, len(commonPrefixes))
for prefix := range commonPrefixes {
diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go
index bfe8bb862..556f8ba2c 100644
--- a/services/keep-web/s3_test.go
+++ b/services/keep-web/s3_test.go
@@ -851,6 +851,9 @@ func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix stri
break
}
for _, key := range resp.Contents {
+ if _, dup := gotKeys[key.Key]; dup {
+ c.Errorf("got duplicate key %q on page %d", key.Key, pages)
+ }
gotKeys[key.Key] = key
if strings.Contains(key.Key, "sailboat.txt") {
c.Check(key.Size, check.Equals, int64(4))
@@ -971,28 +974,31 @@ func (s *IntegrationSuite) testS3CollectionListRollup(c *check.C) {
var expectTruncated bool
for _, key := range allfiles {
full := len(expectKeys)+len(expectPrefixes) >= maxKeys
- if !strings.HasPrefix(key, trial.prefix) || key < trial.marker {
+ if !strings.HasPrefix(key, trial.prefix) || key <= trial.marker {
continue
} else if idx := strings.Index(key[len(trial.prefix):], trial.delimiter); trial.delimiter != "" && idx >= 0 {
prefix := key[:len(trial.prefix)+idx+1]
if len(expectPrefixes) > 0 && expectPrefixes[len(expectPrefixes)-1] == prefix {
// same prefix as previous key
} else if full {
- expectNextMarker = key
expectTruncated = true
} else {
expectPrefixes = append(expectPrefixes, prefix)
+ expectNextMarker = prefix
}
} else if full {
- if trial.delimiter != "" {
- expectNextMarker = key
- }
expectTruncated = true
break
} else {
expectKeys = append(expectKeys, key)
+ if trial.delimiter != "" {
+ expectNextMarker = key
+ }
}
}
+ if !expectTruncated {
+ expectNextMarker = ""
+ }
var gotKeys []string
for _, key := range resp.Contents {
commit 6f28783ebb3c0ba0f74c59879e091bc46a874b28
Author: Tom Clegg <tom at curii.com>
Date: Tue Jul 11 13:38:38 2023 -0400
20726: Update s3ListObjects paging test.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go
index aa91d82ae..bfe8bb862 100644
--- a/services/keep-web/s3_test.go
+++ b/services/keep-web/s3_test.go
@@ -17,6 +17,7 @@ import (
"net/url"
"os"
"os/exec"
+ "sort"
"strings"
"sync"
"time"
@@ -817,8 +818,8 @@ func (s *IntegrationSuite) TestS3CollectionList(c *check.C) {
var markers int
for markers, s.handler.Cluster.Collections.S3FolderObjects = range []bool{false, true} {
- dirs := 2
- filesPerDir := 1001
+ dirs := 2000
+ filesPerDir := 2
stage.writeBigDirs(c, dirs, filesPerDir)
// Total # objects is:
// 2 file entries from s3setup (emptyfile and sailboat.txt)
@@ -827,6 +828,7 @@ func (s *IntegrationSuite) TestS3CollectionList(c *check.C) {
// +filesPerDir*dirs file entries from writeBigDirs (dir0/file0.txt, etc.)
s.testS3List(c, stage.collbucket, "", 4000, markers+2+(filesPerDir+markers)*dirs)
s.testS3List(c, stage.collbucket, "", 131, markers+2+(filesPerDir+markers)*dirs)
+ s.testS3List(c, stage.collbucket, "", 51, markers+2+(filesPerDir+markers)*dirs)
s.testS3List(c, stage.collbucket, "dir0/", 71, filesPerDir+markers)
}
}
@@ -863,7 +865,16 @@ func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix stri
}
nextMarker = resp.NextMarker
}
- c.Check(len(gotKeys), check.Equals, expectFiles)
+ if !c.Check(len(gotKeys), check.Equals, expectFiles) {
+ var sorted []string
+ for k := range gotKeys {
+ sorted = append(sorted, k)
+ }
+ sort.Strings(sorted)
+ for _, k := range sorted {
+ c.Logf("got %s", k)
+ }
+ }
}
func (s *IntegrationSuite) TestS3CollectionListRollup(c *check.C) {
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list