[ARVADOS] created: 2.1.0-1275-g025e6f91c

Git user git at public.arvados.org
Thu Sep 2 15:24:55 UTC 2021


        at  025e6f91c13bdeb78585ce2c7a6d70c532eb8d7b (commit)


commit 025e6f91c13bdeb78585ce2c7a6d70c532eb8d7b
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Thu Sep 2 11:20:12 2021 -0400

    18078: Reacquire llfuse lock before populating collection directory
    
    Updates the test for generating "conflict" files.
    
    I don't have a test case for the reported bug, however this is the
    only place I could find where _entries was being modified without the
    lock held.  By reacquiring the lock, it should no longer be possible
    to read _entries at the same time as it is being updated.
    
    Also clean up a few "from future" bits that are no longer needed
    because we're python 3 only.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py
index 2b963d9a6..d5a018ae8 100644
--- a/services/fuse/arvados_fuse/fusedir.py
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -2,11 +2,6 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-from __future__ import absolute_import
-from __future__ import division
-from future.utils import viewitems
-from future.utils import itervalues
-from builtins import dict
 import apiclient
 import arvados
 import errno
@@ -196,7 +191,7 @@ class Directory(FreshBase):
     def in_use(self):
         if super(Directory, self).in_use():
             return True
-        for v in itervalues(self._entries):
+        for v in self._entries.values():
             if v.in_use():
                 return True
         return False
@@ -204,7 +199,7 @@ class Directory(FreshBase):
     def has_ref(self, only_children):
         if super(Directory, self).has_ref(only_children):
             return True
-        for v in itervalues(self._entries):
+        for v in self._entries.values():
             if v.has_ref(False):
                 return True
         return False
@@ -226,7 +221,7 @@ class Directory(FreshBase):
         # Find self on the parent in order to invalidate this path.
         # Calling the public items() method might trigger a refresh,
         # which we definitely don't want, so read the internal dict directly.
-        for k,v in viewitems(parent._entries):
+        for k,v in parent._entries.items():
             if v is self:
                 self.inodes.invalidate_entry(parent, k)
                 break
@@ -347,9 +342,10 @@ class CollectionDirectoryBase(Directory):
 
     def populate(self, mtime):
         self._mtime = mtime
-        self.collection.subscribe(self.on_event)
-        for entry, item in viewitems(self.collection):
-            self.new_entry(entry, item, self.mtime())
+        with self.collection.lock:
+            self.collection.subscribe(self.on_event)
+            for entry, item in self.collection.items():
+                self.new_entry(entry, item, self.mtime())
 
     def writable(self):
         return self.collection.writable()
@@ -496,6 +492,7 @@ class CollectionDirectory(CollectionDirectoryBase):
                         return
 
                     _logger.debug("Updating collection %s inode %s to record version %s", self.collection_locator, self.inode, to_record_version)
+                    new_collection_record = None
                     if self.collection is not None:
                         if self.collection.known_past_version(to_record_version):
                             _logger.debug("%s already processed %s", self.collection_locator, to_record_version)
@@ -522,12 +519,13 @@ class CollectionDirectory(CollectionDirectoryBase):
                         if 'storage_classes_desired' not in new_collection_record:
                             new_collection_record['storage_classes_desired'] = coll_reader.storage_classes_desired()
 
-                        if self.collection_record is None or self.collection_record["portable_data_hash"] != new_collection_record.get("portable_data_hash"):
-                            self.new_collection(new_collection_record, coll_reader)
-
-                        self._manifest_size = len(coll_reader.manifest_text())
-                        _logger.debug("%s manifest_size %i", self, self._manifest_size)
                 # end with llfuse.lock_released, re-acquire lock
+                if (new_collection_record is not None and
+                    (self.collection_record is None or
+                     self.collection_record["portable_data_hash"] != new_collection_record.get("portable_data_hash"))):
+                    self.new_collection(new_collection_record, coll_reader)
+                    self._manifest_size = len(coll_reader.manifest_text())
+                    _logger.debug("%s manifest_size %i", self, self._manifest_size)
 
                 self.fresh()
                 return True
@@ -1230,7 +1228,7 @@ class SharedDirectory(Directory):
 
             # end with llfuse.lock_released, re-acquire lock
 
-            self.merge(viewitems(contents),
+            self.merge(contents.items(),
                        lambda i: i[0],
                        lambda a, i: a.uuid() == i[1]['uuid'],
                        lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time, storage_classes=self.storage_classes))
diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py
index 82e5c441e..e9343e783 100644
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -15,6 +15,7 @@ import os
 import subprocess
 import time
 import unittest
+import tempfile
 
 import arvados
 import arvados_fuse as fuse
@@ -788,10 +789,15 @@ class FuseDeleteProjectEventTest(MountTestBase):
             attempt(self.assertEqual, [], llfuse.listdir(os.path.join(self.mounttmp, "aproject")))
 
 
-def fuseFileConflictTestHelper(mounttmp):
+def fuseFileConflictTestHelper(mounttmp, uuid, keeptmp):
     class Test(unittest.TestCase):
         def runTest(self):
+            os.environ['KEEP_LOCAL_STORE'] = keeptmp
+
             with open(os.path.join(mounttmp, "file1.txt"), "w") as f:
+                with arvados.collection.Collection(uuid) as collection2:
+                    with collection2.open("file1.txt", "w") as f2:
+                        f2.write("foo")
                 f.write("bar")
 
             d1 = sorted(llfuse.listdir(os.path.join(mounttmp)))
@@ -820,12 +826,8 @@ class FuseFileConflictTest(MountTestBase):
         d1 = llfuse.listdir(os.path.join(self.mounttmp))
         self.assertEqual([], sorted(d1))
 
-        with arvados.collection.Collection(collection.manifest_locator(), api_client=self.api) as collection2:
-            with collection2.open("file1.txt", "w") as f:
-                f.write("foo")
-
         # See note in MountTestBase.setUp
-        self.pool.apply(fuseFileConflictTestHelper, (self.mounttmp,))
+        self.pool.apply(fuseFileConflictTestHelper, (self.mounttmp, collection.manifest_locator(), self.keeptmp))
 
 
 def fuseUnlinkOpenFileTest(mounttmp):

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list