[arvados] updated: 2.7.0-6116-ge74e038e90

git repository hosting git at public.arvados.org
Wed Mar 13 20:36:18 UTC 2024


Summary of changes:
 services/fuse/arvados_fuse/__init__.py  | 50 ++++++++++++++++++++++-----------
 services/fuse/arvados_fuse/command.py   |  1 +
 services/fuse/tests/integration_test.py |  2 +-
 services/fuse/tests/mount_test_base.py  |  4 +--
 services/fuse/tests/test_cache.py       |  1 -
 services/fuse/tests/test_inodes.py      | 19 +++++++++----
 services/fuse/tests/test_mount.py       |  2 +-
 services/fuse/tests/test_unmount.py     |  4 +--
 8 files changed, 55 insertions(+), 28 deletions(-)

       via  e74e038e90759c03a03e76021d0c5b9ab7a9a8ca (commit)
       via  c85df5f3fddf81dfec1c11d948759a628356ea86 (commit)
      from  506f77094efb8c55ec72adf47187b71c2170176c (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit e74e038e90759c03a03e76021d0c5b9ab7a9a8ca
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Wed Mar 13 15:42:46 2024 -0400

    21541: Tests are passing with our llfuse bug fix
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py
index daeca01bf1..8e5db6d9de 100644
--- a/services/fuse/arvados_fuse/__init__.py
+++ b/services/fuse/arvados_fuse/__init__.py
@@ -85,10 +85,10 @@ from .fusefile import StringFile, FuseArvadosFile
 _logger = logging.getLogger('arvados.arvados_fuse')
 
 # Uncomment this to enable llfuse debug logging.
-#log_handler = logging.StreamHandler()
-llogger = logging.getLogger('llfuse')
-#llogger.addHandler(log_handler)
-llogger.setLevel(logging.DEBUG)
+# log_handler = logging.StreamHandler()
+# llogger = logging.getLogger('llfuse')
+# llogger.addHandler(log_handler)
+# llogger.setLevel(logging.DEBUG)
 
 class Handle(object):
     """Connects a numeric file handle to a File or Directory object that has
@@ -570,34 +570,25 @@ class Operations(llfuse.Operations):
     @destroy_time.time()
     @catch_exceptions
     def destroy(self):
-        try:
-            _logger.error("arv-mount destroy: start")
+        _logger.debug("arv-mount destroy: start")
 
-            self.begin_shutdown()
-            _logger.error("arv-mount destroy start1.1")
-
-            if self.events:
-                _logger.error("arv-mount destroy start1.2")
-                self.events.close()
-                self.events = None
-
-            _logger.error("arv-mount destroy start2")
-
-            # Different versions of llfuse require and forbid us to
-            # acquire the lock here. See #8345#note-37, #10805#note-9.
-            if LLFUSE_VERSION_0 and llfuse.lock.acquire():
-                # llfuse < 0.42
-                _logger.error("arv-mount destroy start3")
-                self.inodes.clear()
-                llfuse.lock.release()
-            else:
-                # llfuse >= 0.42
-                _logger.error("arv-mount destroy start4")
-                self.inodes.clear()
+        self.begin_shutdown()
 
-            _logger.error("arv-mount destroy: complete")
-        except Exception as e:
-            _logger.exception("Error during destroy")
+        if self.events:
+            self.events.close()
+            self.events = None
+
+        # Different versions of llfuse require and forbid us to
+        # acquire the lock here. See #8345#note-37, #10805#note-9.
+        if LLFUSE_VERSION_0 and llfuse.lock.acquire():
+            # llfuse < 0.42
+            self.inodes.clear()
+            llfuse.lock.release()
+        else:
+            # llfuse >= 0.42
+            self.inodes.clear()
+
+        _logger.debug("arv-mount destroy: complete")
 
 
     def access(self, inode, mode, ctx):
@@ -731,7 +722,6 @@ class Operations(llfuse.Operations):
     @catch_exceptions
     def forget(self, inodes):
         if self._shutdown_started.is_set():
-            _logger.debug("arv-mount forget: shutdown_started.is_set")
             return
         for inode, nlookup in inodes:
             ent = self.inodes[inode]
diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py
index 430ae0feba..fa8aafdd4a 100644
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -478,8 +478,5 @@ From here, the following directories are available:
         except:
             llfuse.close(unmount=False)
             raise
-        self.logger.error('calling begin_shutdown')
         self.operations.begin_shutdown()
-        time.sleep(1)
-        self.logger.error('calling llfuse.close')
         llfuse.close()
diff --git a/services/fuse/tests/integration_test.py b/services/fuse/tests/integration_test.py
index 89b39dbc87..e80b6983a1 100644
--- a/services/fuse/tests/integration_test.py
+++ b/services/fuse/tests/integration_test.py
@@ -86,7 +86,7 @@ class IntegrationTest(unittest.TestCase):
                     with arvados_fuse.command.Mount(
                             arvados_fuse.command.ArgumentParser().parse_args(
                                 argv + ['--foreground',
-                                        '--unmount-timeout=2',
+                                        '--unmount-timeout=60',
                                         self.mnt])) as self.mount:
                         return func(self, *args, **kwargs)
                 finally:
diff --git a/services/fuse/tests/mount_test_base.py b/services/fuse/tests/mount_test_base.py
index 8a3522e0cb..e0479d3668 100644
--- a/services/fuse/tests/mount_test_base.py
+++ b/services/fuse/tests/mount_test_base.py
@@ -102,10 +102,10 @@ class MountTestBase(unittest.TestCase):
                 self.operations.events.close(timeout=10)
             subprocess.call(["fusermount", "-u", "-z", self.mounttmp])
             t0 = time.time()
-            self.llfuse_thread.join(timeout=10)
+            self.llfuse_thread.join(timeout=60)
             if self.llfuse_thread.is_alive():
                 logger.warning("MountTestBase.tearDown():"
-                               " llfuse thread still alive 10s after umount"
+                               " llfuse thread still alive 20s after umount"
                                " -- exiting with SIGKILL")
                 os.kill(os.getpid(), signal.SIGKILL)
             waited = time.time() - t0
diff --git a/services/fuse/tests/test_cache.py b/services/fuse/tests/test_cache.py
index 5675cbfedf..8cdec8e6ff 100644
--- a/services/fuse/tests/test_cache.py
+++ b/services/fuse/tests/test_cache.py
@@ -37,16 +37,13 @@ class CacheTest(IntegrationTest):
 
     @staticmethod
     def _test_cache_spill(self, mnt, pdh):
-        logging.error("################################################################## WAH WAH1")
         for i,v in enumerate(pdh):
             j = os.path.join(mnt, "by_id", v, "blurg%i" % i)
             self.assertTrue(os.path.exists(j))
             j = os.path.join(mnt, "by_id", v, "dir%i/blurg" % i)
             self.assertTrue(os.path.exists(j))
-        logging.error("################################################################## WAH WAH2")
         for i,v in enumerate(pdh):
             j = os.path.join(mnt, "by_id", v, "blurg%i" % i)
             self.assertTrue(os.path.exists(j))
             j = os.path.join(mnt, "by_id", v, "dir%i/blurg" % i)
             self.assertTrue(os.path.exists(j))
-        logging.error("################################################################## WAH WAH3")
diff --git a/services/fuse/tests/test_inodes.py b/services/fuse/tests/test_inodes.py
index c19a30195e..48e3b242e7 100644
--- a/services/fuse/tests/test_inodes.py
+++ b/services/fuse/tests/test_inodes.py
@@ -12,6 +12,7 @@ class InodeTests(unittest.TestCase):
     def test_inodes_basic(self):
         cache = arvados_fuse.InodeCache(1000, 4)
         inodes = arvados_fuse.Inodes(cache)
+        next(inodes._counter)
 
         # Check that ent1 gets added to inodes
         ent1 = mock.MagicMock()
@@ -27,6 +28,7 @@ class InodeTests(unittest.TestCase):
     def test_inodes_not_persisted(self):
         cache = arvados_fuse.InodeCache(1000, 4)
         inodes = arvados_fuse.Inodes(cache)
+        next(inodes._counter)
 
         ent1 = mock.MagicMock()
         ent1.in_use.return_value = False
@@ -48,6 +50,7 @@ class InodeTests(unittest.TestCase):
     def test_inode_cleared(self):
         cache = arvados_fuse.InodeCache(1000, 4)
         inodes = arvados_fuse.Inodes(cache)
+        next(inodes._counter)
 
         # Check that ent1 gets added to inodes
         ent1 = mock.MagicMock()
@@ -89,6 +92,7 @@ class InodeTests(unittest.TestCase):
     def test_clear_in_use(self):
         cache = arvados_fuse.InodeCache(1000, 4)
         inodes = arvados_fuse.Inodes(cache)
+        next(inodes._counter)
 
         ent1 = mock.MagicMock()
         ent1.in_use.return_value = True
@@ -111,10 +115,12 @@ class InodeTests(unittest.TestCase):
         ent3.clear.called = False
         self.assertFalse(ent1.clear.called)
         self.assertFalse(ent3.clear.called)
-        cache.touch(ent3)
+        inodes.touch(ent3)
+        inodes.wait_remove_queue_empty()
         self.assertFalse(ent1.clear.called)
         self.assertFalse(ent3.clear.called)
-        self.assertFalse(ent3.kernel_invalidate.called)
+        # kernel invalidate gets called anyway
+        self.assertTrue(ent3.kernel_invalidate.called)
         self.assertEqual(1100, cache.total())
 
         # ent1 still in use, ent3 doesn't have ref,
@@ -122,15 +128,16 @@ class InodeTests(unittest.TestCase):
         ent3.has_ref.return_value = False
         ent1.clear.called = False
         ent3.clear.called = False
-        cache.touch(ent3)
+        inodes.touch(ent3)
         inodes.wait_remove_queue_empty()
         self.assertFalse(ent1.clear.called)
         self.assertTrue(ent3.clear.called)
         self.assertEqual(500, cache.total())
 
     def test_delete(self):
-        cache = arvados_fuse.InodeCache(1000, 4)
+        cache = arvados_fuse.InodeCache(1000, 0)
         inodes = arvados_fuse.Inodes(cache)
+        next(inodes._counter)
 
         ent1 = mock.MagicMock()
         ent1.in_use.return_value = False
@@ -152,5 +159,7 @@ class InodeTests(unittest.TestCase):
             inodes.del_entry(ent1)
         inodes.wait_remove_queue_empty()
         self.assertEqual(0, cache.total())
-        cache.touch(ent3)
+
+        inodes.touch(ent3)
+        inodes.wait_remove_queue_empty()
         self.assertEqual(600, cache.total())
diff --git a/services/fuse/tests/test_unmount.py b/services/fuse/tests/test_unmount.py
index e89571087e..6a19b33454 100644
--- a/services/fuse/tests/test_unmount.py
+++ b/services/fuse/tests/test_unmount.py
@@ -31,11 +31,11 @@ class UnmountTest(IntegrationTest):
              self.mnt])
         subprocess.check_call(
             ['./bin/arv-mount', '--subtype', 'test', '--replace',
-             '--unmount-timeout', '10',
+             '--unmount-timeout', '60',
              self.mnt])
         subprocess.check_call(
             ['./bin/arv-mount', '--subtype', 'test', '--replace',
-             '--unmount-timeout', '10',
+             '--unmount-timeout', '60',
              self.mnt,
              '--exec', 'true'])
         for m in subprocess.check_output(['mount']).splitlines():

commit c85df5f3fddf81dfec1c11d948759a628356ea86
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Tue Mar 12 17:10:58 2024 -0400

    21541: test segfault fix with debug stuff, will clean up next commit
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py
index 42f22a9ada..daeca01bf1 100644
--- a/services/fuse/arvados_fuse/__init__.py
+++ b/services/fuse/arvados_fuse/__init__.py
@@ -77,7 +77,6 @@ import arvados.keep
 from prometheus_client import Summary
 import queue
 
-
 LLFUSE_VERSION_0 = llfuse.__version__.startswith('0')
 
 from .fusedir import Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase
@@ -86,10 +85,10 @@ from .fusefile import StringFile, FuseArvadosFile
 _logger = logging.getLogger('arvados.arvados_fuse')
 
 # Uncomment this to enable llfuse debug logging.
-# log_handler = logging.StreamHandler()
-# llogger = logging.getLogger('llfuse')
-# llogger.addHandler(log_handler)
-# llogger.setLevel(logging.DEBUG)
+#log_handler = logging.StreamHandler()
+llogger = logging.getLogger('llfuse')
+#llogger.addHandler(log_handler)
+llogger.setLevel(logging.DEBUG)
 
 class Handle(object):
     """Connects a numeric file handle to a File or Directory object that has
@@ -287,17 +286,16 @@ class Inodes(object):
             _logger.debug("del_entry on inode %i with refcount %i", entry.inode, entry.ref_count)
 
     def _inode_remove(self):
-        locked_ops = []
+        locked_ops = collections.deque()
         while True:
             try:
-                locked_ops.clear()
-
                 entry = self._inode_remove_queue.get(True)
                 if entry is None:
                     return
                 # Process this entry
                 _logger.debug("_inode_remove %s", entry)
-                self._inode_op(entry, locked_ops)
+                if self._inode_op(entry, locked_ops):
+                    self._inode_remove_queue.task_done()
 
                 # Drain the queue of any other entries
                 while True:
@@ -306,13 +304,15 @@ class Inodes(object):
                         if entry is None:
                             return
                         _logger.debug("_inode_remove %s", entry)
-                        self._inode_op(entry, locked_ops)
+                        if self._inode_op(entry, locked_ops):
+                            self._inode_remove_queue.task_done()
                     except queue.Empty:
                         break
 
                 with llfuse.lock:
-                    for lk in locked_ops:
-                        self._inode_op(entry, None)
+                    while len(locked_ops) > 0:
+                        if self._inode_op(locked_ops.popleft(), None):
+                            self._inode_remove_queue.task_done()
                     for entry in self.inode_cache.evict_candidates():
                         self._remove(entry)
             except Exception as e:
@@ -320,25 +320,28 @@ class Inodes(object):
 
     def _inode_op(self, op, locked_ops):
         if self._shutdown_started.is_set():
-            return
+            return True
         if op[0] == "remove":
             if locked_ops is None:
                 self._remove(op[1])
+                return True
             else:
                 locked_ops.append(op)
+                return False
         if op[0] == "invalidate_inode":
             _logger.debug("sending invalidate inode %i", op[1])
             llfuse.invalidate_inode(op[1])
+            return True
         if op[0] == "invalidate_entry":
             _logger.debug("sending invalidate to inode %i entry %s", op[1], op[2])
             llfuse.invalidate_entry(op[1], op[2])
+            return True
         if op[0] == "evict_candidates":
-            pass
+            return True
 
     def wait_remove_queue_empty(self):
         # used by tests
-        while not self._inode_remove_queue.empty():
-            time.sleep(.1)
+        self._inode_remove_queue.join()
 
     def _remove(self, entry):
         try:
@@ -362,7 +365,7 @@ class Inodes(object):
 
             forget_inode = True
             parent = self._entries.get(entry.parent_inode)
-            if parent is not None and parent.has_ref():
+            if (parent is not None and parent.has_ref()) or entry.inode == llfuse.ROOT_INODE:
                 # the parent is still referenced, so we'll keep the
                 # entry but wipe out the stuff under it
                 forget_inode = False
@@ -404,10 +407,15 @@ class Inodes(object):
             # inode and hasn't yet forgotten about it.
             self._inode_remove_queue.put(("invalidate_entry", entry.inode, native(name.encode(self.encoding))))
 
-    def clear(self):
+    def begin_shutdown(self):
         self._inode_remove_queue.put(None)
-        with llfuse.lock_released:
+        if self._inode_remove_thread is not None:
             self._inode_remove_thread.join()
+        self._inode_remove_thread = None
+
+    def clear(self):
+        with llfuse.lock_released:
+            self.begin_shutdown()
 
         self.inode_cache.clear()
 
@@ -555,23 +563,42 @@ class Operations(llfuse.Operations):
     def metric_count_func(self, opname):
         return lambda: int(self.metric_value(opname, "arvmount_fuse_operations_seconds_count"))
 
+    def begin_shutdown(self):
+        self._shutdown_started.set()
+        self.inodes.begin_shutdown()
+
     @destroy_time.time()
     @catch_exceptions
     def destroy(self):
-        self._shutdown_started.set()
-        if self.events:
-            self.events.close()
-            self.events = None
+        try:
+            _logger.error("arv-mount destroy: start")
+
+            self.begin_shutdown()
+            _logger.error("arv-mount destroy start1.1")
+
+            if self.events:
+                _logger.error("arv-mount destroy start1.2")
+                self.events.close()
+                self.events = None
+
+            _logger.error("arv-mount destroy start2")
+
+            # Different versions of llfuse require and forbid us to
+            # acquire the lock here. See #8345#note-37, #10805#note-9.
+            if LLFUSE_VERSION_0 and llfuse.lock.acquire():
+                # llfuse < 0.42
+                _logger.error("arv-mount destroy start3")
+                self.inodes.clear()
+                llfuse.lock.release()
+            else:
+                # llfuse >= 0.42
+                _logger.error("arv-mount destroy start4")
+                self.inodes.clear()
+
+            _logger.error("arv-mount destroy: complete")
+        except Exception as e:
+            _logger.exception("Error during destroy")
 
-        # Different versions of llfuse require and forbid us to
-        # acquire the lock here. See #8345#note-37, #10805#note-9.
-        if LLFUSE_VERSION_0 and llfuse.lock.acquire():
-            # llfuse < 0.42
-            self.inodes.clear()
-            llfuse.lock.release()
-        else:
-            # llfuse >= 0.42
-            self.inodes.clear()
 
     def access(self, inode, mode, ctx):
         return True
@@ -704,6 +731,7 @@ class Operations(llfuse.Operations):
     @catch_exceptions
     def forget(self, inodes):
         if self._shutdown_started.is_set():
+            _logger.debug("arv-mount forget: shutdown_started.is_set")
             return
         for inode, nlookup in inodes:
             ent = self.inodes[inode]
diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py
index 3cc30f83b1..430ae0feba 100644
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -478,4 +478,8 @@ From here, the following directories are available:
         except:
             llfuse.close(unmount=False)
             raise
+        self.logger.error('calling begin_shutdown')
+        self.operations.begin_shutdown()
+        time.sleep(1)
+        self.logger.error('calling llfuse.close')
         llfuse.close()
diff --git a/services/fuse/tests/test_cache.py b/services/fuse/tests/test_cache.py
index 46ed0be411..5675cbfedf 100644
--- a/services/fuse/tests/test_cache.py
+++ b/services/fuse/tests/test_cache.py
@@ -37,14 +37,16 @@ class CacheTest(IntegrationTest):
 
     @staticmethod
     def _test_cache_spill(self, mnt, pdh):
+        logging.error("################################################################## WAH WAH1")
         for i,v in enumerate(pdh):
             j = os.path.join(mnt, "by_id", v, "blurg%i" % i)
             self.assertTrue(os.path.exists(j))
             j = os.path.join(mnt, "by_id", v, "dir%i/blurg" % i)
             self.assertTrue(os.path.exists(j))
-
+        logging.error("################################################################## WAH WAH2")
         for i,v in enumerate(pdh):
             j = os.path.join(mnt, "by_id", v, "blurg%i" % i)
             self.assertTrue(os.path.exists(j))
             j = os.path.join(mnt, "by_id", v, "dir%i/blurg" % i)
             self.assertTrue(os.path.exists(j))
+        logging.error("################################################################## WAH WAH3")
diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py
index da88cf0eb9..b3bec39cc5 100644
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -1127,7 +1127,7 @@ class MagicDirApiError(FuseMagicTest):
 class SanitizeFilenameTest(MountTestBase):
     def test_sanitize_filename(self):
         pdir = fuse.ProjectDirectory(
-            1, {}, self.api, 0, False, None,
+            1, fuse.Inodes(None), self.api, 0, False, None,
             project_object=self.api.users().current().execute(),
         )
         acceptable = [

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list