[arvados] created: 2.5.0-3-g42bb84a00

git repository hosting git at public.arvados.org
Fri Feb 10 15:31:13 UTC 2023


        at  42bb84a001024e466687fb579992c6cfc82c979a (commit)


commit 42bb84a001024e466687fb579992c6cfc82c979a
Author: Tom Clegg <tom at curii.com>
Date:   Fri Feb 10 10:11:09 2023 -0500

    20083: Don't copy remote blocks when computing local PDH.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go
index 8e0537730..84ff69d6b 100644
--- a/sdk/go/arvados/fs_collection.go
+++ b/sdk/go/arvados/fs_collection.go
@@ -103,7 +103,7 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile
 		return nil, err
 	}
 
-	txt, err := root.marshalManifest(context.Background(), ".")
+	txt, err := root.marshalManifest(context.Background(), ".", false)
 	if err != nil {
 		return nil, err
 	}
@@ -495,7 +495,7 @@ func (fs *collectionFileSystem) MemorySize() int64 {
 func (fs *collectionFileSystem) MarshalManifest(prefix string) (string, error) {
 	fs.fileSystem.root.Lock()
 	defer fs.fileSystem.root.Unlock()
-	return fs.fileSystem.root.(*dirnode).marshalManifest(context.TODO(), prefix)
+	return fs.fileSystem.root.(*dirnode).marshalManifest(context.TODO(), prefix, true)
 }
 
 func (fs *collectionFileSystem) Size() int64 {
@@ -1229,7 +1229,7 @@ func (dn *dirnode) sortedNames() []string {
 }
 
 // caller must have write lock.
-func (dn *dirnode) marshalManifest(ctx context.Context, prefix string) (string, error) {
+func (dn *dirnode) marshalManifest(ctx context.Context, prefix string, flush bool) (string, error) {
 	cg := newContextGroup(ctx)
 	defer cg.Cancel()
 
@@ -1276,7 +1276,7 @@ func (dn *dirnode) marshalManifest(ctx context.Context, prefix string) (string,
 	for i, name := range dirnames {
 		i, name := i, name
 		cg.Go(func() error {
-			txt, err := dn.inodes[name].(*dirnode).marshalManifest(cg.Context(), prefix+"/"+name)
+			txt, err := dn.inodes[name].(*dirnode).marshalManifest(cg.Context(), prefix+"/"+name, flush)
 			subdirs[i] = txt
 			return err
 		})
@@ -1292,7 +1292,10 @@ func (dn *dirnode) marshalManifest(ctx context.Context, prefix string) (string,
 
 		var fileparts []filepart
 		var blocks []string
-		if err := dn.flush(cg.Context(), filenames, flushOpts{sync: true, shortBlocks: true}); err != nil {
+		if !flush {
+			// skip flush -- will fail below if anything
+			// needed flushing
+		} else if err := dn.flush(cg.Context(), filenames, flushOpts{sync: true, shortBlocks: true}); err != nil {
 			return err
 		}
 		for _, name := range filenames {
@@ -1323,10 +1326,12 @@ func (dn *dirnode) marshalManifest(ctx context.Context, prefix string) (string,
 					}
 					streamLen += int64(seg.size)
 				default:
-					// This can't happen: we
-					// haven't unlocked since
+					// We haven't unlocked since
 					// calling flush(sync=true).
-					panic(fmt.Sprintf("can't marshal segment type %T", seg))
+					// Evidently the caller passed
+					// flush==false but there were
+					// local changes.
+					return fmt.Errorf("can't marshal segment type %T", seg)
 				}
 			}
 		}

commit 56f1b8c97ac3fcca86c0ff556926862d11cf84d0
Author: Tom Clegg <tom at curii.com>
Date:   Fri Feb 10 02:34:34 2023 -0500

    20083: Track both loadedPDH and savedPDH for checking local changes.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go
index 354658a25..8e0537730 100644
--- a/sdk/go/arvados/fs_collection.go
+++ b/sdk/go/arvados/fs_collection.go
@@ -44,9 +44,17 @@ type CollectionFileSystem interface {
 type collectionFileSystem struct {
 	fileSystem
 	uuid           string
-	savedPDH       atomic.Value
 	replicas       int
 	storageClasses []string
+
+	// PDH returned by the server as of last sync/load.
+	loadedPDH atomic.Value
+	// PDH of the locally generated manifest as of last
+	// sync/load. This can differ from loadedPDH after loading a
+	// version that was generated with different code and sorts
+	// filenames differently than we do, for example.
+	savedPDH atomic.Value
+
 	// guessSignatureTTL tracks a lower bound for the server's
 	// configured BlobSigningTTL. The guess is initially zero, and
 	// increases when we come across a signature with an expiry
@@ -74,7 +82,7 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile
 			thr:       newThrottle(concurrentWriters),
 		},
 	}
-	fs.savedPDH.Store(c.PortableDataHash)
+	fs.loadedPDH.Store(c.PortableDataHash)
 	if r := c.ReplicationDesired; r != nil {
 		fs.replicas = *r
 	}
@@ -94,6 +102,13 @@ func (c *Collection) FileSystem(client apiClient, kc keepClient) (CollectionFile
 	if err := root.loadManifest(c.ManifestText); err != nil {
 		return nil, err
 	}
+
+	txt, err := root.marshalManifest(context.Background(), ".")
+	if err != nil {
+		return nil, err
+	}
+	fs.savedPDH.Store(PortableDataHash(txt))
+
 	backdateTree(root, modTime)
 	fs.root = root
 	return fs, nil
@@ -296,7 +311,7 @@ func (fs *collectionFileSystem) Truncate(int64) error {
 // Return value is true if new content was loaded from upstream and
 // any unsaved local changes have been discarded.
 func (fs *collectionFileSystem) checkChangesOnServer(force bool) (bool, error) {
-	if fs.uuid == "" && fs.savedPDH.Load() == "" {
+	if fs.uuid == "" && fs.loadedPDH.Load() == "" {
 		return false, nil
 	}
 
@@ -316,6 +331,7 @@ func (fs *collectionFileSystem) checkChangesOnServer(force bool) (bool, error) {
 		return false, nil
 	}
 
+	loadedPDH, _ := fs.loadedPDH.Load().(string)
 	getparams := map[string]interface{}{"select": []string{"portable_data_hash", "manifest_text"}}
 	if fs.uuid != "" {
 		var coll Collection
@@ -323,7 +339,7 @@ func (fs *collectionFileSystem) checkChangesOnServer(force bool) (bool, error) {
 		if err != nil {
 			return false, err
 		}
-		if coll.PortableDataHash != fs.savedPDH.Load().(string) {
+		if coll.PortableDataHash != loadedPDH {
 			// collection has changed upstream since we
 			// last loaded or saved. Refresh local data,
 			// losing any unsaved local changes.
@@ -339,15 +355,16 @@ func (fs *collectionFileSystem) checkChangesOnServer(force bool) (bool, error) {
 			if err != nil {
 				return false, err
 			}
-			fs.savedPDH.Store(coll.PortableDataHash)
+			fs.loadedPDH.Store(coll.PortableDataHash)
+			fs.savedPDH.Store(newfs.(*collectionFileSystem).savedPDH.Load())
 			return true, nil
 		}
 		fs.updateSignatures(coll.ManifestText)
 		return false, nil
 	}
-	if pdh := fs.savedPDH.Load().(string); pdh != "" {
+	if loadedPDH != "" {
 		var coll Collection
-		err := fs.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+pdh, nil, getparams)
+		err := fs.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+loadedPDH, nil, getparams)
 		if err != nil {
 			return false, err
 		}
@@ -368,8 +385,9 @@ func (fs *collectionFileSystem) refreshSignature(locator string) string {
 		go fs.checkChangesOnServer(false)
 		return locator
 	}
+	loadedPDH, _ := fs.loadedPDH.Load().(string)
 	var manifests string
-	for _, id := range []string{fs.uuid, fs.savedPDH.Load().(string)} {
+	for _, id := range []string{fs.uuid, loadedPDH} {
 		if id == "" {
 			continue
 		}
@@ -405,7 +423,8 @@ func (fs *collectionFileSystem) Sync() error {
 	if err != nil {
 		return fmt.Errorf("sync failed: %s", err)
 	}
-	if PortableDataHash(txt) == fs.savedPDH.Load() {
+	savingPDH := PortableDataHash(txt)
+	if savingPDH == fs.savedPDH.Load() {
 		// No local changes since last save or initial load.
 		return nil
 	}
@@ -432,7 +451,8 @@ func (fs *collectionFileSystem) Sync() error {
 		return fmt.Errorf("sync failed: update %s: %w", fs.uuid, err)
 	}
 	fs.updateSignatures(coll.ManifestText)
-	fs.savedPDH.Store(coll.PortableDataHash)
+	fs.loadedPDH.Store(coll.PortableDataHash)
+	fs.savedPDH.Store(savingPDH)
 	return nil
 }
 
diff --git a/sdk/go/arvados/fs_collection_test.go b/sdk/go/arvados/fs_collection_test.go
index 8b84ddf3e..e91853c16 100644
--- a/sdk/go/arvados/fs_collection_test.go
+++ b/sdk/go/arvados/fs_collection_test.go
@@ -1661,7 +1661,7 @@ func (s *CollectionFSUnitSuite) TestLargeManifest(c *check.C) {
 	runtime.ReadMemStats(&memstats)
 	c.Logf("%s Alloc=%d Sys=%d", time.Now(), memstats.Alloc, memstats.Sys)
 
-	f, err := coll.FileSystem(nil, nil)
+	f, err := coll.FileSystem(NewClientFromEnv(), &keepClientStub{})
 	c.Check(err, check.IsNil)
 	c.Logf("%s loaded", time.Now())
 	c.Check(f.Size(), check.Equals, int64(42*dirCount*fileCount))

commit 6a4bc6bb4456910ddd16f3e0480c8562ee953e58
Author: Tom Clegg <tom at curii.com>
Date:   Thu Feb 9 11:57:07 2023 -0500

    20083: Test false positive in "save local changes" logic.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/sdk/go/arvados/fs_collection_test.go b/sdk/go/arvados/fs_collection_test.go
index 73689e4ea..8b84ddf3e 100644
--- a/sdk/go/arvados/fs_collection_test.go
+++ b/sdk/go/arvados/fs_collection_test.go
@@ -124,6 +124,32 @@ func (s *CollectionFSSuite) SetUpTest(c *check.C) {
 	c.Assert(err, check.IsNil)
 }
 
+func (s *CollectionFSSuite) TestSyncNonCanonicalManifest(c *check.C) {
+	var coll Collection
+	err := s.client.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+fixtureFooAndBarFilesInDirUUID, nil, nil)
+	c.Assert(err, check.IsNil)
+	mtxt := strings.Replace(coll.ManifestText, "3:3:bar 0:3:foo", "0:3:foo 3:3:bar", -1)
+	c.Assert(mtxt, check.Not(check.Equals), coll.ManifestText)
+	err = s.client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{
+		"collection": map[string]interface{}{
+			"manifest_text": mtxt}})
+	c.Assert(err, check.IsNil)
+	c.Assert(mtxt, check.Equals, coll.ManifestText)
+
+	fs, err := coll.FileSystem(s.client, s.kc)
+	c.Assert(err, check.IsNil)
+	err = fs.Sync()
+	c.Check(err, check.IsNil)
+
+	// fs had no local changes, so Sync should not have saved
+	// anything back to the API/database. (If it did, we would see
+	// the manifest rewritten in canonical order.)
+	var saved Collection
+	err = s.client.RequestAndDecode(&saved, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil)
+	c.Assert(err, check.IsNil)
+	c.Check(saved.ManifestText, check.Equals, mtxt)
+}
+
 func (s *CollectionFSSuite) TestHttpFileSystemInterface(c *check.C) {
 	_, ok := s.fs.(http.FileSystem)
 	c.Check(ok, check.Equals, true)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list