[arvados] updated: 2.7.0-6114-g506f77094e

git repository hosting git at public.arvados.org
Mon Mar 11 21:00:57 UTC 2024


Summary of changes:
 services/fuse/arvados_fuse/__init__.py | 56 ++++++++++++++++++++++---------
 services/fuse/arvados_fuse/command.py  |  6 ++--
 services/fuse/arvados_fuse/fusedir.py  | 61 ++++++++++++++++------------------
 services/fuse/tests/test_inodes.py     | 10 ++++--
 services/fuse/tests/test_mount.py      |  9 +++--
 5 files changed, 85 insertions(+), 57 deletions(-)

       via  506f77094efb8c55ec72adf47187b71c2170176c (commit)
       via  f941c9f66382861809a27bdc47da95dfd91d94ed (commit)
      from  7890e45a180760b351bb0eb3ea7a237094f24ec1 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 506f77094efb8c55ec72adf47187b71c2170176c
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Mar 11 17:00:08 2024 -0400

    21541: Fixed ForwardSlashNameSubstitution tests
    
    But running the full test suite is getting segfaults, I think a thread
    is running past its lifetime.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py
index 193da88038..42f22a9ada 100644
--- a/services/fuse/arvados_fuse/__init__.py
+++ b/services/fuse/arvados_fuse/__init__.py
@@ -229,11 +229,13 @@ class Inodes(object):
     """Manage the set of inodes.  This is the mapping from a numeric id
     to a concrete File or Directory object"""
 
-    def __init__(self, inode_cache, encoding="utf-8"):
+    def __init__(self, inode_cache, encoding="utf-8", fsns=None, shutdown_started=None):
         self._entries = {}
         self._counter = itertools.count(llfuse.ROOT_INODE)
         self.inode_cache = inode_cache
         self.encoding = encoding
+        self._fsns = fsns
+        self._shutdown_started = shutdown_started or threading.Event()
 
         self._inode_remove_queue = queue.Queue()
         self._inode_remove_thread = threading.Thread(None, self._inode_remove)
@@ -258,6 +260,7 @@ class Inodes(object):
     def touch(self, entry):
         entry._atime = time.time()
         self.inode_cache.touch(entry)
+        self.cap_cache()
 
     def cap_cache(self):
         self._inode_remove_queue.put(("evict_candidates",))
@@ -316,6 +319,8 @@ class Inodes(object):
                 _logger.exception("_inode_remove")
 
     def _inode_op(self, op, locked_ops):
+        if self._shutdown_started.is_set():
+            return
         if op[0] == "remove":
             if locked_ops is None:
                 self._remove(op[1])
@@ -330,6 +335,10 @@ class Inodes(object):
         if op[0] == "evict_candidates":
             pass
 
+    def wait_remove_queue_empty(self):
+        # used by tests
+        while not self._inode_remove_queue.empty():
+            time.sleep(.1)
 
     def _remove(self, entry):
         try:
@@ -337,8 +346,12 @@ class Inodes(object):
                 # Removed already
                 return
 
+            # Tell the kernel it should forget about it.
+            entry.kernel_invalidate()
+
             if entry.has_ref():
-                # has kernel reference, can't be removed.
+                # has kernel reference, could still be accessed.
+                # when the kernel forgets about it, we can delete it.
                 #_logger.debug("InodeCache cannot clear inode %i, is referenced", entry.inode)
                 return
 
@@ -363,14 +376,12 @@ class Inodes(object):
 
             _logger.debug("InodeCache removing inode %i", entry.inode)
 
-            # Invalidate the entry for self on the parent
-            entry.kernel_invalidate()
-
             # For directories, clear the contents
             entry.clear()
 
-            _logger.debug("InodeCache clearing inode %i, total %i, forget_inode %s",
-                          entry.inode, self.inode_cache.total(), forget_inode)
+            _logger.debug("InodeCache clearing inode %i, total %i, forget_inode %s, inode entries %i, type %s",
+                          entry.inode, self.inode_cache.total(), forget_inode,
+                          len(self._entries), type(entry))
             if forget_inode:
                 del self._entries[entry.inode]
                 entry.inode = None
@@ -408,6 +419,9 @@ class Inodes(object):
 
         self._entries.clear()
 
+    def forward_slash_subst(self):
+        return self._fsns
+
 
 def catch_exceptions(orig_func):
     """Catch uncaught exceptions and log them consistently."""
@@ -468,14 +482,32 @@ class Operations(llfuse.Operations):
     rename_time = fuse_time.labels(op='rename')
     flush_time = fuse_time.labels(op='flush')
 
-    def __init__(self, uid, gid, api_client, encoding="utf-8", inode_cache=None, num_retries=4, enable_write=False):
+    def __init__(self, uid, gid, api_client, encoding="utf-8", inode_cache=None, num_retries=4, enable_write=False, fsns=None):
         super(Operations, self).__init__()
 
         self._api_client = api_client
 
         if not inode_cache:
             inode_cache = InodeCache(cap=256*1024*1024)
-        self.inodes = Inodes(inode_cache, encoding=encoding)
+
+        if fsns is None:
+            try:
+                fsns = self._api_client.config()["Collections"]["ForwardSlashNameSubstitution"]
+            except KeyError:
+                # old API server with no FSNS config
+                fsns = '_'
+            else:
+                if fsns == '' or fsns == '/':
+                    fsns = None
+
+        # If we get overlapping shutdown events (e.g., fusermount -u
+        # -z and operations.destroy()) llfuse calls forget() on inodes
+        # that have already been deleted. To avoid this, we make
+        # forget() a no-op if called after destroy().
+        self._shutdown_started = threading.Event()
+
+        self.inodes = Inodes(inode_cache, encoding=encoding, fsns=fsns,
+                             shutdown_started=self._shutdown_started)
         self.uid = uid
         self.gid = gid
         self.enable_write = enable_write
@@ -488,12 +520,6 @@ class Operations(llfuse.Operations):
         # is fully initialized should wait() on this event object.
         self.initlock = threading.Event()
 
-        # If we get overlapping shutdown events (e.g., fusermount -u
-        # -z and operations.destroy()) llfuse calls forget() on inodes
-        # that have already been deleted. To avoid this, we make
-        # forget() a no-op if called after destroy().
-        self._shutdown_started = threading.Event()
-
         self.num_retries = num_retries
 
         self.read_counter = arvados.keep.Counter()
diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py
index 715096fc2c..3cc30f83b1 100644
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -113,6 +113,8 @@ class ArgumentParser(argparse.ArgumentParser):
 
         self.add_argument('--crunchstat-interval', type=float, help="Write stats to stderr every N seconds (default disabled)", default=0)
 
+        self.add_argument('--fsns', type=str, default=None, help=argparse.SUPPRESS)
+
         unmount = self.add_mutually_exclusive_group()
         unmount.add_argument('--unmount', action='store_true', default=False,
                              help="Forcefully unmount the specified mountpoint (if it's a fuse mount) and exit. If --subtype is given, unmount only if the mount has the specified subtype. WARNING: This command can affect any kind of fuse mount, not just arv-mount.")
@@ -298,7 +300,8 @@ class Mount(object):
             api_client=self.api,
             encoding=self.args.encoding,
             inode_cache=InodeCache(cap=self.args.directory_cache),
-            enable_write=self.args.enable_write)
+            enable_write=self.args.enable_write,
+            fsns=self.args.fsns)
 
         if self.args.crunchstat_interval:
             statsthread = threading.Thread(
@@ -387,7 +390,6 @@ class Mount(object):
         e = self.operations.inodes.add_entry(Directory(
             llfuse.ROOT_INODE,
             self.operations.inodes,
-            self.api.config(),
             self.args.enable_write,
             self.args.filters,
         ))
diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py
index e41966b329..ffdce581f8 100644
--- a/services/fuse/arvados_fuse/fusedir.py
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -36,7 +36,7 @@ class Directory(FreshBase):
     and the value referencing a File or Directory object.
     """
 
-    def __init__(self, parent_inode, inodes, apiconfig, enable_write, filters):
+    def __init__(self, parent_inode, inodes, enable_write, filters):
         """parent_inode is the integer inode number"""
 
         super(Directory, self).__init__()
@@ -46,7 +46,6 @@ class Directory(FreshBase):
             raise Exception("parent_inode should be an int")
         self.parent_inode = parent_inode
         self.inodes = inodes
-        self.apiconfig = apiconfig
         self._entries = {}
         self._mtime = time.time()
         self._enable_write = enable_write
@@ -64,23 +63,9 @@ class Directory(FreshBase):
             else:
                 yield [f_name, *f[1:]]
 
-    def forward_slash_subst(self):
-        if not hasattr(self, '_fsns'):
-            self._fsns = None
-            config = self.apiconfig
-            try:
-                self._fsns = config["Collections"]["ForwardSlashNameSubstitution"]
-            except KeyError:
-                # old API server with no FSNS config
-                self._fsns = '_'
-            else:
-                if self._fsns == '' or self._fsns == '/':
-                    self._fsns = None
-        return self._fsns
-
     def unsanitize_filename(self, incoming):
         """Replace ForwardSlashNameSubstitution value with /"""
-        fsns = self.forward_slash_subst()
+        fsns = self.inodes.forward_slash_subst()
         if isinstance(fsns, str):
             return incoming.replace(fsns, '/')
         else:
@@ -99,7 +84,7 @@ class Directory(FreshBase):
         elif dirty == '..':
             return '__'
         else:
-            fsns = self.forward_slash_subst()
+            fsns = self.inodes.forward_slash_subst()
             if isinstance(fsns, str):
                 dirty = dirty.replace('/', fsns)
             return _disallowed_filename_characters.sub('_', dirty)
@@ -249,6 +234,8 @@ class Directory(FreshBase):
                 self.inodes.invalidate_entry(parent, k)
                 break
 
+        self.inodes.invalidate_inode(self)
+
     def mtime(self):
         return self._mtime
 
@@ -292,9 +279,8 @@ class CollectionDirectoryBase(Directory):
 
     """
 
-    def __init__(self, parent_inode, inodes, apiconfig, enable_write, filters, collection, collection_root):
-        super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, apiconfig, enable_write, filters)
-        self.apiconfig = apiconfig
+    def __init__(self, parent_inode, inodes, enable_write, filters, collection, collection_root):
+        super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, enable_write, filters)
         self.collection = collection
         self.collection_root = collection_root
         self.collection_record_file = None
@@ -312,7 +298,6 @@ class CollectionDirectoryBase(Directory):
             self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(
                 self.inode,
                 self.inodes,
-                self.apiconfig,
                 self._enable_write,
                 self._filters,
                 item,
@@ -469,7 +454,7 @@ class CollectionDirectory(CollectionDirectoryBase):
     """Represents the root of a directory tree representing a collection."""
 
     def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters=None, collection_record=None, explicit_collection=None):
-        super(CollectionDirectory, self).__init__(parent_inode, inodes, api.config(), enable_write, filters, None, self)
+        super(CollectionDirectory, self).__init__(parent_inode, inodes, enable_write, filters, None, self)
         self.api = api
         self.num_retries = num_retries
         self._poll = True
@@ -654,9 +639,8 @@ class CollectionDirectory(CollectionDirectoryBase):
     def clear(self):
         if self.collection is not None:
             self.collection.stop_threads()
-        super(CollectionDirectory, self).clear()
         self._manifest_size = 0
-        self.inodes.inode_cache.update_cache_size(self)
+        super(CollectionDirectory, self).clear()
 
 
 class TmpCollectionDirectory(CollectionDirectoryBase):
@@ -683,7 +667,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
         # This is always enable_write=True because it never tries to
         # save to the backend
         super(TmpCollectionDirectory, self).__init__(
-            parent_inode, inodes, api_client.config, True, filters, collection, self)
+            parent_inode, inodes, True, filters, collection, self)
         self.populate(self.mtime())
 
     def on_event(self, *args, **kwargs):
@@ -780,7 +764,7 @@ and the directory will appear if it exists.
 """.lstrip()
 
     def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters, pdh_only=False, storage_classes=None):
-        super(MagicDirectory, self).__init__(parent_inode, inodes, api.config(), enable_write, filters)
+        super(MagicDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
         self.api = api
         self.num_retries = num_retries
         self.pdh_only = pdh_only
@@ -879,7 +863,7 @@ class TagsDirectory(Directory):
     """A special directory that contains as subdirectories all tags visible to the user."""
 
     def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters, poll_time=60):
-        super(TagsDirectory, self).__init__(parent_inode, inodes, api.config(), enable_write, filters)
+        super(TagsDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
         self.api = api
         self.num_retries = num_retries
         self._poll = True
@@ -959,7 +943,7 @@ class TagDirectory(Directory):
 
     def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters, tag,
                  poll=False, poll_time=60):
-        super(TagDirectory, self).__init__(parent_inode, inodes, api.config(), enable_write, filters)
+        super(TagDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
         self.api = api
         self.num_retries = num_retries
         self.tag = tag
@@ -1002,7 +986,7 @@ class ProjectDirectory(Directory):
 
     def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters,
                  project_object, poll=True, poll_time=3, storage_classes=None):
-        super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config(), enable_write, filters)
+        super(ProjectDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
         self.api = api
         self.num_retries = num_retries
         self.project_object = project_object
@@ -1326,7 +1310,7 @@ class SharedDirectory(Directory):
 
     def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters,
                  exclude, poll=False, poll_time=60, storage_classes=None):
-        super(SharedDirectory, self).__init__(parent_inode, inodes, api.config(), enable_write, filters)
+        super(SharedDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
         self.api = api
         self.num_retries = num_retries
         self.current_user = api.users().current().execute(num_retries=num_retries)
diff --git a/services/fuse/tests/test_inodes.py b/services/fuse/tests/test_inodes.py
index 07e6036d08..c19a30195e 100644
--- a/services/fuse/tests/test_inodes.py
+++ b/services/fuse/tests/test_inodes.py
@@ -68,19 +68,21 @@ class InodeTests(unittest.TestCase):
         inodes.add_entry(ent3)
 
         # Won't clear anything because min_entries = 4
-        self.assertEqual(2, len(cache._entries))
+        self.assertEqual(2, len(cache._cache_entries))
         self.assertFalse(ent1.clear.called)
         self.assertEqual(1100, cache.total())
 
         # Change min_entries
         cache.min_entries = 1
-        cache.cap_cache()
+        inodes.cap_cache()
+        inodes.wait_remove_queue_empty()
         self.assertEqual(600, cache.total())
         self.assertTrue(ent1.clear.called)
 
         # Touching ent1 should cause ent3 to get cleared
         self.assertFalse(ent3.clear.called)
-        cache.touch(ent1)
+        inodes.touch(ent1)
+        inodes.wait_remove_queue_empty()
         self.assertTrue(ent3.clear.called)
         self.assertEqual(500, cache.total())
 
@@ -121,6 +123,7 @@ class InodeTests(unittest.TestCase):
         ent1.clear.called = False
         ent3.clear.called = False
         cache.touch(ent3)
+        inodes.wait_remove_queue_empty()
         self.assertFalse(ent1.clear.called)
         self.assertTrue(ent3.clear.called)
         self.assertEqual(500, cache.total())
@@ -147,6 +150,7 @@ class InodeTests(unittest.TestCase):
         ent1.ref_count = 0
         with llfuse.lock:
             inodes.del_entry(ent1)
+        inodes.wait_remove_queue_empty()
         self.assertEqual(0, cache.total())
         cache.touch(ent3)
         self.assertEqual(600, cache.total())
diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py
index ef9c25bcf5..da88cf0eb9 100644
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -1227,23 +1227,22 @@ class SlashSubstitutionTest(IntegrationTest):
     mnt_args = [
         '--read-write',
         '--mount-home', 'zzz',
+        '--fsns', '[SLASH]'
     ]
 
     def setUp(self):
         super(SlashSubstitutionTest, self).setUp()
+
         self.api = arvados.safeapi.ThreadSafeApiCache(
             arvados.config.settings(),
-            version='v1',
+            version='v1'
         )
-        self.api.config = lambda: {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
         self.testcoll = self.api.collections().create(body={"name": "foo/bar/baz"}).execute()
         self.testcolleasy = self.api.collections().create(body={"name": "foo-bar-baz"}).execute()
         self.fusename = 'foo[SLASH]bar[SLASH]baz'
 
     @IntegrationTest.mount(argv=mnt_args)
-    @mock.patch('arvados.util.get_config_once')
-    def test_slash_substitution_before_listing(self, get_config_once):
-        get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
+    def test_slash_substitution_before_listing(self):
         self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename)
         self.checkContents()
     @staticmethod

commit f941c9f66382861809a27bdc47da95dfd91d94ed
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Mar 11 09:57:17 2024 -0400

    21541: Add check for filter groups containing themselves
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py
index fe1db416f1..e41966b329 100644
--- a/services/fuse/arvados_fuse/fusedir.py
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -1014,6 +1014,19 @@ class ProjectDirectory(Directory):
         self._current_user = None
         self._full_listing = False
         self.storage_classes = storage_classes
+        self.recursively_contained = False
+
+        # Filter groups can contain themselves, which causes tools
+        # that walk the filesystem to get stuck in an infinite loop,
+        # so suppress returning a listing in that case.
+        if self.project_object.get("group_class") == "filter":
+            iter_parent_inode = parent_inode
+            while iter_parent_inode != llfuse.ROOT_INODE:
+                parent_dir = self.inodes[iter_parent_inode]
+                if isinstance(parent_dir, ProjectDirectory) and parent_dir.project_uuid == self.project_uuid:
+                    self.recursively_contained = True
+                    break
+                iter_parent_inode = parent_dir.parent_inode
 
     def want_event_subscribe(self):
         return True
@@ -1064,7 +1077,7 @@ class ProjectDirectory(Directory):
             self.project_object_file = ObjectFile(self.inode, self.project_object)
             self.inodes.add_entry(self.project_object_file)
 
-        if not self._full_listing:
+        if self.recursively_contained or not self._full_listing:
             return True
 
         def samefn(a, i):

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list