[arvados] created: 2.5.0-3-g42bb84a00
git repository hosting
git at public.arvados.org
Fri Feb 10 15:31:13 UTC 2023
at 42bb84a001024e466687fb579992c6cfc82c979a (commit)
commit 42bb84a001024e466687fb579992c6cfc82c979a
Author: Tom Clegg <tom at curii.com>
Date: Fri Feb 10 10:11:09 2023 -0500
20083: Don't copy remote blocks when computing local PDH.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go
index 8e0537730..84ff69d6b 100644
--- a/sdk/go/arvados/fs_collection.go
+++ b/sdk/go/arvados/fs_collection.go
@@ -103,7 +103,7 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile
return nil, err
}
- txt, err := root.marshalManifest(context.Background(), ".")
+ txt, err := root.marshalManifest(context.Background(), ".", false)
if err != nil {
return nil, err
}
@@ -495,7 +495,7 @@ func (fs *collectionFileSystem) MemorySize() int64 {
func (fs *collectionFileSystem) MarshalManifest(prefix string) (string, error) {
fs.fileSystem.root.Lock()
defer fs.fileSystem.root.Unlock()
- return fs.fileSystem.root.(*dirnode).marshalManifest(context.TODO(), prefix)
+ return fs.fileSystem.root.(*dirnode).marshalManifest(context.TODO(), prefix, true)
}
func (fs *collectionFileSystem) Size() int64 {
@@ -1229,7 +1229,7 @@ func (dn *dirnode) sortedNames() []string {
}
// caller must have write lock.
-func (dn *dirnode) marshalManifest(ctx context.Context, prefix string) (string, error) {
+func (dn *dirnode) marshalManifest(ctx context.Context, prefix string, flush bool) (string, error) {
cg := newContextGroup(ctx)
defer cg.Cancel()
@@ -1276,7 +1276,7 @@ func (dn *dirnode) marshalManifest(ctx context.Context, prefix string) (string,
for i, name := range dirnames {
i, name := i, name
cg.Go(func() error {
- txt, err := dn.inodes[name].(*dirnode).marshalManifest(cg.Context(), prefix+"/"+name)
+ txt, err := dn.inodes[name].(*dirnode).marshalManifest(cg.Context(), prefix+"/"+name, flush)
subdirs[i] = txt
return err
})
@@ -1292,7 +1292,10 @@ func (dn *dirnode) marshalManifest(ctx context.Context, prefix string) (string,
var fileparts []filepart
var blocks []string
- if err := dn.flush(cg.Context(), filenames, flushOpts{sync: true, shortBlocks: true}); err != nil {
+ if !flush {
+ // skip flush -- will fail below if anything
+ // needed flushing
+ } else if err := dn.flush(cg.Context(), filenames, flushOpts{sync: true, shortBlocks: true}); err != nil {
return err
}
for _, name := range filenames {
@@ -1323,10 +1326,12 @@ func (dn *dirnode) marshalManifest(ctx context.Context, prefix string) (string,
}
streamLen += int64(seg.size)
default:
- // This can't happen: we
- // haven't unlocked since
+ // We haven't unlocked since
// calling flush(sync=true).
- panic(fmt.Sprintf("can't marshal segment type %T", seg))
+ // Evidently the caller passed
+ // flush==false but there were
+ // local changes.
+ return fmt.Errorf("can't marshal segment type %T", seg)
}
}
}
commit 56f1b8c97ac3fcca86c0ff556926862d11cf84d0
Author: Tom Clegg <tom at curii.com>
Date: Fri Feb 10 02:34:34 2023 -0500
20083: Track both loadedPDH and savedPDH for checking local changes.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go
index 354658a25..8e0537730 100644
--- a/sdk/go/arvados/fs_collection.go
+++ b/sdk/go/arvados/fs_collection.go
@@ -44,9 +44,17 @@ type CollectionFileSystem interface {
type collectionFileSystem struct {
fileSystem
uuid string
- savedPDH atomic.Value
replicas int
storageClasses []string
+
+ // PDH returned by the server as of last sync/load.
+ loadedPDH atomic.Value
+ // PDH of the locally generated manifest as of last
+ // sync/load. This can differ from loadedPDH after loading a
+ // version that was generated with different code and sorts
+ // filenames differently than we do, for example.
+ savedPDH atomic.Value
+
// guessSignatureTTL tracks a lower bound for the server's
// configured BlobSigningTTL. The guess is initially zero, and
// increases when we come across a signature with an expiry
@@ -74,7 +82,7 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile
thr: newThrottle(concurrentWriters),
},
}
- fs.savedPDH.Store(c.PortableDataHash)
+ fs.loadedPDH.Store(c.PortableDataHash)
if r := c.ReplicationDesired; r != nil {
fs.replicas = *r
}
@@ -94,6 +102,13 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile
if err := root.loadManifest(c.ManifestText); err != nil {
return nil, err
}
+
+ txt, err := root.marshalManifest(context.Background(), ".")
+ if err != nil {
+ return nil, err
+ }
+ fs.savedPDH.Store(PortableDataHash(txt))
+
backdateTree(root, modTime)
fs.root = root
return fs, nil
@@ -296,7 +311,7 @@ func (fs *collectionFileSystem) Truncate(int64) error {
// Return value is true if new content was loaded from upstream and
// any unsaved local changes have been discarded.
func (fs *collectionFileSystem) checkChangesOnServer(force bool) (bool, error) {
- if fs.uuid == "" && fs.savedPDH.Load() == "" {
+ if fs.uuid == "" && fs.loadedPDH.Load() == "" {
return false, nil
}
@@ -316,6 +331,7 @@ func (fs *collectionFileSystem) checkChangesOnServer(force bool) (bool, error) {
return false, nil
}
+ loadedPDH, _ := fs.loadedPDH.Load().(string)
getparams := map[string]interface{}{"select": []string{"portable_data_hash", "manifest_text"}}
if fs.uuid != "" {
var coll Collection
@@ -323,7 +339,7 @@ func (fs *collectionFileSystem) checkChangesOnServer(force bool) (bool, error) {
if err != nil {
return false, err
}
- if coll.PortableDataHash != fs.savedPDH.Load().(string) {
+ if coll.PortableDataHash != loadedPDH {
// collection has changed upstream since we
// last loaded or saved. Refresh local data,
// losing any unsaved local changes.
@@ -339,15 +355,16 @@ func (fs *collectionFileSystem) checkChangesOnServer(force bool) (bool, error) {
if err != nil {
return false, err
}
- fs.savedPDH.Store(coll.PortableDataHash)
+ fs.loadedPDH.Store(coll.PortableDataHash)
+ fs.savedPDH.Store(newfs.(*collectionFileSystem).savedPDH.Load())
return true, nil
}
fs.updateSignatures(coll.ManifestText)
return false, nil
}
- if pdh := fs.savedPDH.Load().(string); pdh != "" {
+ if loadedPDH != "" {
var coll Collection
- err := fs.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+pdh, nil, getparams)
+ err := fs.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+loadedPDH, nil, getparams)
if err != nil {
return false, err
}
@@ -368,8 +385,9 @@ func (fs *collectionFileSystem) refreshSignature(locator string) string {
go fs.checkChangesOnServer(false)
return locator
}
+ loadedPDH, _ := fs.loadedPDH.Load().(string)
var manifests string
- for _, id := range []string{fs.uuid, fs.savedPDH.Load().(string)} {
+ for _, id := range []string{fs.uuid, loadedPDH} {
if id == "" {
continue
}
@@ -405,7 +423,8 @@ func (fs *collectionFileSystem) Sync() error {
if err != nil {
return fmt.Errorf("sync failed: %s", err)
}
- if PortableDataHash(txt) == fs.savedPDH.Load() {
+ savingPDH := PortableDataHash(txt)
+ if savingPDH == fs.savedPDH.Load() {
// No local changes since last save or initial load.
return nil
}
@@ -432,7 +451,8 @@ func (fs *collectionFileSystem) Sync() error {
return fmt.Errorf("sync failed: update %s: %w", fs.uuid, err)
}
fs.updateSignatures(coll.ManifestText)
- fs.savedPDH.Store(coll.PortableDataHash)
+ fs.loadedPDH.Store(coll.PortableDataHash)
+ fs.savedPDH.Store(savingPDH)
return nil
}
diff --git a/sdk/go/arvados/fs_collection_test.go b/sdk/go/arvados/fs_collection_test.go
index 8b84ddf3e..e91853c16 100644
--- a/sdk/go/arvados/fs_collection_test.go
+++ b/sdk/go/arvados/fs_collection_test.go
@@ -1661,7 +1661,7 @@ func (s *CollectionFSUnitSuite) TestLargeManifest(c *check.C) {
runtime.ReadMemStats(&memstats)
c.Logf("%s Alloc=%d Sys=%d", time.Now(), memstats.Alloc, memstats.Sys)
- f, err := coll.FileSystem(nil, nil)
+ f, err := coll.FileSystem(NewClientFromEnv(), &keepClientStub{})
c.Check(err, check.IsNil)
c.Logf("%s loaded", time.Now())
c.Check(f.Size(), check.Equals, int64(42*dirCount*fileCount))
commit 6a4bc6bb4456910ddd16f3e0480c8562ee953e58
Author: Tom Clegg <tom at curii.com>
Date: Thu Feb 9 11:57:07 2023 -0500
20083: Test false positive in "save local changes" logic.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/fs_collection_test.go b/sdk/go/arvados/fs_collection_test.go
index 73689e4ea..8b84ddf3e 100644
--- a/sdk/go/arvados/fs_collection_test.go
+++ b/sdk/go/arvados/fs_collection_test.go
@@ -124,6 +124,32 @@ func (s *CollectionFSSuite) SetUpTest(c *check.C) {
c.Assert(err, check.IsNil)
}
+func (s *CollectionFSSuite) TestSyncNonCanonicalManifest(c *check.C) {
+ var coll Collection
+ err := s.client.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+fixtureFooAndBarFilesInDirUUID, nil, nil)
+ c.Assert(err, check.IsNil)
+ mtxt := strings.Replace(coll.ManifestText, "3:3:bar 0:3:foo", "0:3:foo 3:3:bar", -1)
+ c.Assert(mtxt, check.Not(check.Equals), coll.ManifestText)
+ err = s.client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
+ "collection": map[string]interface{}{
+ "manifest_text": mtxt}})
+ c.Assert(err, check.IsNil)
+ c.Assert(mtxt, check.Equals, coll.ManifestText)
+
+ fs, err := coll.FileSystem(s.client, s.kc)
+ c.Assert(err, check.IsNil)
+ err = fs.Sync()
+ c.Check(err, check.IsNil)
+
+ // fs had no local changes, so Sync should not have saved
+ // anything back to the API/database. (If it did, we would see
+ // the manifest rewritten in canonical order.)
+ var saved Collection
+ err = s.client.RequestAndDecode(&saved, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil)
+ c.Assert(err, check.IsNil)
+ c.Check(saved.ManifestText, check.Equals, mtxt)
+}
+
func (s *CollectionFSSuite) TestHttpFileSystemInterface(c *check.C) {
_, ok := s.fs.(http.FileSystem)
c.Check(ok, check.Equals, true)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list