[arvados] updated: 2.7.0-6114-g506f77094e
git repository hosting
git at public.arvados.org
Mon Mar 11 21:00:57 UTC 2024
Summary of changes:
services/fuse/arvados_fuse/__init__.py | 56 ++++++++++++++++++++++---------
services/fuse/arvados_fuse/command.py | 6 ++--
services/fuse/arvados_fuse/fusedir.py | 61 ++++++++++++++++------------------
services/fuse/tests/test_inodes.py | 10 ++++--
services/fuse/tests/test_mount.py | 9 +++--
5 files changed, 85 insertions(+), 57 deletions(-)
via 506f77094efb8c55ec72adf47187b71c2170176c (commit)
via f941c9f66382861809a27bdc47da95dfd91d94ed (commit)
from 7890e45a180760b351bb0eb3ea7a237094f24ec1 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 506f77094efb8c55ec72adf47187b71c2170176c
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon Mar 11 17:00:08 2024 -0400
21541: Fixed ForwardSlashNameSubstitution tests
But running the full test suite is getting segfaults, I think a thread
is running past its lifetime.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py
index 193da88038..42f22a9ada 100644
--- a/services/fuse/arvados_fuse/__init__.py
+++ b/services/fuse/arvados_fuse/__init__.py
@@ -229,11 +229,13 @@ class Inodes(object):
"""Manage the set of inodes. This is the mapping from a numeric id
to a concrete File or Directory object"""
- def __init__(self, inode_cache, encoding="utf-8"):
+ def __init__(self, inode_cache, encoding="utf-8", fsns=None, shutdown_started=None):
self._entries = {}
self._counter = itertools.count(llfuse.ROOT_INODE)
self.inode_cache = inode_cache
self.encoding = encoding
+ self._fsns = fsns
+ self._shutdown_started = shutdown_started or threading.Event()
self._inode_remove_queue = queue.Queue()
self._inode_remove_thread = threading.Thread(None, self._inode_remove)
@@ -258,6 +260,7 @@ class Inodes(object):
def touch(self, entry):
entry._atime = time.time()
self.inode_cache.touch(entry)
+ self.cap_cache()
def cap_cache(self):
self._inode_remove_queue.put(("evict_candidates",))
@@ -316,6 +319,8 @@ class Inodes(object):
_logger.exception("_inode_remove")
def _inode_op(self, op, locked_ops):
+ if self._shutdown_started.is_set():
+ return
if op[0] == "remove":
if locked_ops is None:
self._remove(op[1])
@@ -330,6 +335,10 @@ class Inodes(object):
if op[0] == "evict_candidates":
pass
+ def wait_remove_queue_empty(self):
+ # used by tests
+ while not self._inode_remove_queue.empty():
+ time.sleep(.1)
def _remove(self, entry):
try:
@@ -337,8 +346,12 @@ class Inodes(object):
# Removed already
return
+ # Tell the kernel it should forget about it.
+ entry.kernel_invalidate()
+
if entry.has_ref():
- # has kernel reference, can't be removed.
+ # has kernel reference, could still be accessed.
+ # when the kernel forgets about it, we can delete it.
#_logger.debug("InodeCache cannot clear inode %i, is referenced", entry.inode)
return
@@ -363,14 +376,12 @@ class Inodes(object):
_logger.debug("InodeCache removing inode %i", entry.inode)
- # Invalidate the entry for self on the parent
- entry.kernel_invalidate()
-
# For directories, clear the contents
entry.clear()
- _logger.debug("InodeCache clearing inode %i, total %i, forget_inode %s",
- entry.inode, self.inode_cache.total(), forget_inode)
+ _logger.debug("InodeCache clearing inode %i, total %i, forget_inode %s, inode entries %i, type %s",
+ entry.inode, self.inode_cache.total(), forget_inode,
+ len(self._entries), type(entry))
if forget_inode:
del self._entries[entry.inode]
entry.inode = None
@@ -408,6 +419,9 @@ class Inodes(object):
self._entries.clear()
+ def forward_slash_subst(self):
+ return self._fsns
+
def catch_exceptions(orig_func):
"""Catch uncaught exceptions and log them consistently."""
@@ -468,14 +482,32 @@ class Operations(llfuse.Operations):
rename_time = fuse_time.labels(op='rename')
flush_time = fuse_time.labels(op='flush')
- def __init__(self, uid, gid, api_client, encoding="utf-8", inode_cache=None, num_retries=4, enable_write=False):
+ def __init__(self, uid, gid, api_client, encoding="utf-8", inode_cache=None, num_retries=4, enable_write=False, fsns=None):
super(Operations, self).__init__()
self._api_client = api_client
if not inode_cache:
inode_cache = InodeCache(cap=256*1024*1024)
- self.inodes = Inodes(inode_cache, encoding=encoding)
+
+ if fsns is None:
+ try:
+ fsns = self._api_client.config()["Collections"]["ForwardSlashNameSubstitution"]
+ except KeyError:
+ # old API server with no FSNS config
+ fsns = '_'
+ else:
+ if fsns == '' or fsns == '/':
+ fsns = None
+
+ # If we get overlapping shutdown events (e.g., fusermount -u
+ # -z and operations.destroy()) llfuse calls forget() on inodes
+ # that have already been deleted. To avoid this, we make
+ # forget() a no-op if called after destroy().
+ self._shutdown_started = threading.Event()
+
+ self.inodes = Inodes(inode_cache, encoding=encoding, fsns=fsns,
+ shutdown_started=self._shutdown_started)
self.uid = uid
self.gid = gid
self.enable_write = enable_write
@@ -488,12 +520,6 @@ class Operations(llfuse.Operations):
# is fully initialized should wait() on this event object.
self.initlock = threading.Event()
- # If we get overlapping shutdown events (e.g., fusermount -u
- # -z and operations.destroy()) llfuse calls forget() on inodes
- # that have already been deleted. To avoid this, we make
- # forget() a no-op if called after destroy().
- self._shutdown_started = threading.Event()
-
self.num_retries = num_retries
self.read_counter = arvados.keep.Counter()
diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py
index 715096fc2c..3cc30f83b1 100644
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -113,6 +113,8 @@ class ArgumentParser(argparse.ArgumentParser):
self.add_argument('--crunchstat-interval', type=float, help="Write stats to stderr every N seconds (default disabled)", default=0)
+ self.add_argument('--fsns', type=str, default=None, help=argparse.SUPPRESS)
+
unmount = self.add_mutually_exclusive_group()
unmount.add_argument('--unmount', action='store_true', default=False,
help="Forcefully unmount the specified mountpoint (if it's a fuse mount) and exit. If --subtype is given, unmount only if the mount has the specified subtype. WARNING: This command can affect any kind of fuse mount, not just arv-mount.")
@@ -298,7 +300,8 @@ class Mount(object):
api_client=self.api,
encoding=self.args.encoding,
inode_cache=InodeCache(cap=self.args.directory_cache),
- enable_write=self.args.enable_write)
+ enable_write=self.args.enable_write,
+ fsns=self.args.fsns)
if self.args.crunchstat_interval:
statsthread = threading.Thread(
@@ -387,7 +390,6 @@ class Mount(object):
e = self.operations.inodes.add_entry(Directory(
llfuse.ROOT_INODE,
self.operations.inodes,
- self.api.config(),
self.args.enable_write,
self.args.filters,
))
diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py
index e41966b329..ffdce581f8 100644
--- a/services/fuse/arvados_fuse/fusedir.py
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -36,7 +36,7 @@ class Directory(FreshBase):
and the value referencing a File or Directory object.
"""
- def __init__(self, parent_inode, inodes, apiconfig, enable_write, filters):
+ def __init__(self, parent_inode, inodes, enable_write, filters):
"""parent_inode is the integer inode number"""
super(Directory, self).__init__()
@@ -46,7 +46,6 @@ class Directory(FreshBase):
raise Exception("parent_inode should be an int")
self.parent_inode = parent_inode
self.inodes = inodes
- self.apiconfig = apiconfig
self._entries = {}
self._mtime = time.time()
self._enable_write = enable_write
@@ -64,23 +63,9 @@ class Directory(FreshBase):
else:
yield [f_name, *f[1:]]
- def forward_slash_subst(self):
- if not hasattr(self, '_fsns'):
- self._fsns = None
- config = self.apiconfig
- try:
- self._fsns = config["Collections"]["ForwardSlashNameSubstitution"]
- except KeyError:
- # old API server with no FSNS config
- self._fsns = '_'
- else:
- if self._fsns == '' or self._fsns == '/':
- self._fsns = None
- return self._fsns
-
def unsanitize_filename(self, incoming):
"""Replace ForwardSlashNameSubstitution value with /"""
- fsns = self.forward_slash_subst()
+ fsns = self.inodes.forward_slash_subst()
if isinstance(fsns, str):
return incoming.replace(fsns, '/')
else:
@@ -99,7 +84,7 @@ class Directory(FreshBase):
elif dirty == '..':
return '__'
else:
- fsns = self.forward_slash_subst()
+ fsns = self.inodes.forward_slash_subst()
if isinstance(fsns, str):
dirty = dirty.replace('/', fsns)
return _disallowed_filename_characters.sub('_', dirty)
@@ -249,6 +234,8 @@ class Directory(FreshBase):
self.inodes.invalidate_entry(parent, k)
break
+ self.inodes.invalidate_inode(self)
+
def mtime(self):
return self._mtime
@@ -292,9 +279,8 @@ class CollectionDirectoryBase(Directory):
"""
- def __init__(self, parent_inode, inodes, apiconfig, enable_write, filters, collection, collection_root):
- super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, apiconfig, enable_write, filters)
- self.apiconfig = apiconfig
+ def __init__(self, parent_inode, inodes, enable_write, filters, collection, collection_root):
+ super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, enable_write, filters)
self.collection = collection
self.collection_root = collection_root
self.collection_record_file = None
@@ -312,7 +298,6 @@ class CollectionDirectoryBase(Directory):
self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(
self.inode,
self.inodes,
- self.apiconfig,
self._enable_write,
self._filters,
item,
@@ -469,7 +454,7 @@ class CollectionDirectory(CollectionDirectoryBase):
"""Represents the root of a directory tree representing a collection."""
def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters=None, collection_record=None, explicit_collection=None):
- super(CollectionDirectory, self).__init__(parent_inode, inodes, api.config(), enable_write, filters, None, self)
+ super(CollectionDirectory, self).__init__(parent_inode, inodes, enable_write, filters, None, self)
self.api = api
self.num_retries = num_retries
self._poll = True
@@ -654,9 +639,8 @@ class CollectionDirectory(CollectionDirectoryBase):
def clear(self):
if self.collection is not None:
self.collection.stop_threads()
- super(CollectionDirectory, self).clear()
self._manifest_size = 0
- self.inodes.inode_cache.update_cache_size(self)
+ super(CollectionDirectory, self).clear()
class TmpCollectionDirectory(CollectionDirectoryBase):
@@ -683,7 +667,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
# This is always enable_write=True because it never tries to
# save to the backend
super(TmpCollectionDirectory, self).__init__(
- parent_inode, inodes, api_client.config, True, filters, collection, self)
+ parent_inode, inodes, True, filters, collection, self)
self.populate(self.mtime())
def on_event(self, *args, **kwargs):
@@ -780,7 +764,7 @@ and the directory will appear if it exists.
""".lstrip()
def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters, pdh_only=False, storage_classes=None):
- super(MagicDirectory, self).__init__(parent_inode, inodes, api.config(), enable_write, filters)
+ super(MagicDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self.pdh_only = pdh_only
@@ -879,7 +863,7 @@ class TagsDirectory(Directory):
"""A special directory that contains as subdirectories all tags visible to the user."""
def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters, poll_time=60):
- super(TagsDirectory, self).__init__(parent_inode, inodes, api.config(), enable_write, filters)
+ super(TagsDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self._poll = True
@@ -959,7 +943,7 @@ class TagDirectory(Directory):
def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters, tag,
poll=False, poll_time=60):
- super(TagDirectory, self).__init__(parent_inode, inodes, api.config(), enable_write, filters)
+ super(TagDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self.tag = tag
@@ -1002,7 +986,7 @@ class ProjectDirectory(Directory):
def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters,
project_object, poll=True, poll_time=3, storage_classes=None):
- super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config(), enable_write, filters)
+ super(ProjectDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self.project_object = project_object
@@ -1326,7 +1310,7 @@ class SharedDirectory(Directory):
def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters,
exclude, poll=False, poll_time=60, storage_classes=None):
- super(SharedDirectory, self).__init__(parent_inode, inodes, api.config(), enable_write, filters)
+ super(SharedDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self.current_user = api.users().current().execute(num_retries=num_retries)
diff --git a/services/fuse/tests/test_inodes.py b/services/fuse/tests/test_inodes.py
index 07e6036d08..c19a30195e 100644
--- a/services/fuse/tests/test_inodes.py
+++ b/services/fuse/tests/test_inodes.py
@@ -68,19 +68,21 @@ class InodeTests(unittest.TestCase):
inodes.add_entry(ent3)
# Won't clear anything because min_entries = 4
- self.assertEqual(2, len(cache._entries))
+ self.assertEqual(2, len(cache._cache_entries))
self.assertFalse(ent1.clear.called)
self.assertEqual(1100, cache.total())
# Change min_entries
cache.min_entries = 1
- cache.cap_cache()
+ inodes.cap_cache()
+ inodes.wait_remove_queue_empty()
self.assertEqual(600, cache.total())
self.assertTrue(ent1.clear.called)
# Touching ent1 should cause ent3 to get cleared
self.assertFalse(ent3.clear.called)
- cache.touch(ent1)
+ inodes.touch(ent1)
+ inodes.wait_remove_queue_empty()
self.assertTrue(ent3.clear.called)
self.assertEqual(500, cache.total())
@@ -121,6 +123,7 @@ class InodeTests(unittest.TestCase):
ent1.clear.called = False
ent3.clear.called = False
cache.touch(ent3)
+ inodes.wait_remove_queue_empty()
self.assertFalse(ent1.clear.called)
self.assertTrue(ent3.clear.called)
self.assertEqual(500, cache.total())
@@ -147,6 +150,7 @@ class InodeTests(unittest.TestCase):
ent1.ref_count = 0
with llfuse.lock:
inodes.del_entry(ent1)
+ inodes.wait_remove_queue_empty()
self.assertEqual(0, cache.total())
cache.touch(ent3)
self.assertEqual(600, cache.total())
diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py
index ef9c25bcf5..da88cf0eb9 100644
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -1227,23 +1227,22 @@ class SlashSubstitutionTest(IntegrationTest):
mnt_args = [
'--read-write',
'--mount-home', 'zzz',
+ '--fsns', '[SLASH]'
]
def setUp(self):
super(SlashSubstitutionTest, self).setUp()
+
self.api = arvados.safeapi.ThreadSafeApiCache(
arvados.config.settings(),
- version='v1',
+ version='v1'
)
- self.api.config = lambda: {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
self.testcoll = self.api.collections().create(body={"name": "foo/bar/baz"}).execute()
self.testcolleasy = self.api.collections().create(body={"name": "foo-bar-baz"}).execute()
self.fusename = 'foo[SLASH]bar[SLASH]baz'
@IntegrationTest.mount(argv=mnt_args)
- @mock.patch('arvados.util.get_config_once')
- def test_slash_substitution_before_listing(self, get_config_once):
- get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
+ def test_slash_substitution_before_listing(self):
self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename)
self.checkContents()
@staticmethod
commit f941c9f66382861809a27bdc47da95dfd91d94ed
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon Mar 11 09:57:17 2024 -0400
21541: Add check for filter groups containing themselves
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py
index fe1db416f1..e41966b329 100644
--- a/services/fuse/arvados_fuse/fusedir.py
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -1014,6 +1014,19 @@ class ProjectDirectory(Directory):
self._current_user = None
self._full_listing = False
self.storage_classes = storage_classes
+ self.recursively_contained = False
+
+ # Filter groups can contain themselves, which causes tools
+ # that walk the filesystem to get stuck in an infinite loop,
+ # so suppress returning a listing in that case.
+ if self.project_object.get("group_class") == "filter":
+ iter_parent_inode = parent_inode
+ while iter_parent_inode != llfuse.ROOT_INODE:
+ parent_dir = self.inodes[iter_parent_inode]
+ if isinstance(parent_dir, ProjectDirectory) and parent_dir.project_uuid == self.project_uuid:
+ self.recursively_contained = True
+ break
+ iter_parent_inode = parent_dir.parent_inode
def want_event_subscribe(self):
return True
@@ -1064,7 +1077,7 @@ class ProjectDirectory(Directory):
self.project_object_file = ObjectFile(self.inode, self.project_object)
self.inodes.add_entry(self.project_object_file)
- if not self._full_listing:
+ if self.recursively_contained or not self._full_listing:
return True
def samefn(a, i):
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list