[arvados] updated: 2.7.1-49-g0c5bbc10c7
git repository hosting
git at public.arvados.org
Tue Apr 2 19:52:06 UTC 2024
Summary of changes:
doc/_config.yml | 15 +-
.../install-shell-server.html.textile.liquid | 2 +-
doc/sdk/cli/index.html.textile.liquid | 2 +-
doc/sdk/cli/install.html.textile.liquid | 2 +-
doc/sdk/cli/reference.html.textile.liquid | 2 +-
doc/sdk/cli/subcommands.html.textile.liquid | 2 +-
doc/sdk/fuse/install.html.textile.liquid | 42 ++
doc/sdk/fuse/options.html.textile.liquid | 193 +++++++
doc/sdk/index.html.textile.liquid | 15 +-
doc/sdk/java-v2/example.html.textile.liquid | 2 +-
doc/sdk/java-v2/index.html.textile.liquid | 2 +-
doc/sdk/java-v2/javadoc.html.textile.liquid | 2 +-
doc/sdk/python/arvados-fuse.html.textile.liquid | 48 --
.../getting_started/setup-cli.html.textile.liquid | 2 +-
lib/crunchrun/crunchrun.go | 13 +-
lib/crunchrun/integration_test.go | 2 +
services/fuse/arvados_fuse/__init__.py | 555 +++++++++++++++------
services/fuse/arvados_fuse/command.py | 429 ++++++++++++----
services/fuse/arvados_fuse/fresh.py | 15 +-
services/fuse/arvados_fuse/fusedir.py | 201 +++++---
services/fuse/arvados_fuse/fusefile.py | 13 +
services/fuse/setup.py | 2 +-
services/fuse/tests/integration_test.py | 2 +-
services/fuse/tests/mount_test_base.py | 4 +-
services/fuse/tests/test_inodes.py | 37 +-
services/fuse/tests/test_mount.py | 11 +-
services/fuse/tests/test_unmount.py | 4 +-
27 files changed, 1183 insertions(+), 436 deletions(-)
create mode 100644 doc/sdk/fuse/install.html.textile.liquid
create mode 100644 doc/sdk/fuse/options.html.textile.liquid
delete mode 100644 doc/sdk/python/arvados-fuse.html.textile.liquid
via 0c5bbc10c79f5ed4a67c475762fd2e1c574a9b71 (commit)
via e16cc17af3c60785f12fd6c3ecf01af0be74842b (commit)
via 35b218008f0fa6b700c50ab33324b7d25fc043a6 (commit)
from 157e992b3d152a98ba1a2d6f242a3a6c87da3cd8 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 0c5bbc10c79f5ed4a67c475762fd2e1c574a9b71
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Tue Apr 2 11:26:33 2024 -0400
Merge branch '21541-arv-mount-keyerror-rebase' refs #21541
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py
index 31afcda8d1..d827aefab7 100644
--- a/services/fuse/arvados_fuse/__init__.py
+++ b/services/fuse/arvados_fuse/__init__.py
@@ -47,16 +47,15 @@ The general FUSE operation flow is as follows:
The FUSE driver supports the Arvados event bus. When an event is received for
an object that is live in the inode cache, that object is immediately updated.
+Implementation note: in the code, the terms 'object', 'entry' and
+'inode' are used somewhat interchangeably, but generally mean an
+arvados_fuse.File or arvados_fuse.Directory object which has numeric
+inode assigned to it and appears in the Inodes._entries dictionary.
+
"""
from __future__ import absolute_import
from __future__ import division
-from future.utils import viewitems
-from future.utils import native
-from future.utils import listvalues
-from future.utils import listitems
-from future import standard_library
-standard_library.install_aliases()
from builtins import next
from builtins import str
from builtins import object
@@ -76,22 +75,11 @@ import functools
import arvados.keep
from prometheus_client import Summary
import queue
-
-# Default _notify_queue has a limit of 1000 items, but it really needs to be
-# unlimited to avoid deadlocks, see https://arvados.org/issues/3198#note-43 for
-# details.
-
-if hasattr(llfuse, 'capi'):
- # llfuse < 0.42
- llfuse.capi._notify_queue = queue.Queue()
-else:
- # llfuse >= 0.42
- llfuse._notify_queue = queue.Queue()
-
-LLFUSE_VERSION_0 = llfuse.__version__.startswith('0')
+from dataclasses import dataclass
+import typing
from .fusedir import Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase
-from .fusefile import StringFile, FuseArvadosFile
+from .fusefile import File, StringFile, FuseArvadosFile
_logger = logging.getLogger('arvados.arvados_fuse')
@@ -128,28 +116,47 @@ class FileHandle(Handle):
class DirectoryHandle(Handle):
"""Connects a numeric file handle to a Directory object that has
- been opened by the client."""
+ been opened by the client.
+
+ DirectoryHandle is used by opendir() and readdir() to get
+ directory listings. Entries returned by readdir() don't increment
+ the lookup count (kernel references), so increment our internal
+ "use count" to avoid having an item being removed mid-read.
+
+ """
def __init__(self, fh, dirobj, entries):
super(DirectoryHandle, self).__init__(fh, dirobj)
self.entries = entries
+ for ent in self.entries:
+ ent[1].inc_use()
+
+ def release(self):
+ for ent in self.entries:
+ ent[1].dec_use()
+ super(DirectoryHandle, self).release()
+
class InodeCache(object):
"""Records the memory footprint of objects and when they are last used.
- When the cache limit is exceeded, the least recently used objects are
- cleared. Clearing the object means discarding its contents to release
- memory. The next time the object is accessed, it must be re-fetched from
- the server. Note that the inode cache limit is a soft limit; the cache
- limit may be exceeded if necessary to load very large objects, it may also
- be exceeded if open file handles prevent objects from being cleared.
+ When the cache limit is exceeded, the least recently used objects
+ are cleared. Clearing the object means discarding its contents to
+ release memory. The next time the object is accessed, it must be
+ re-fetched from the server. Note that the inode cache limit is a
+ soft limit; the cache limit may be exceeded if necessary to load
+ very large projects or collections, it may also be exceeded if an
+ inode can't be safely discarded based on kernel lookups
+ (has_ref()) or internal use count (in_use()).
"""
def __init__(self, cap, min_entries=4):
- self._entries = collections.OrderedDict()
- self._by_uuid = {}
+ # Standard dictionaries are ordered, but OrderedDict is still better here, see
+ # https://docs.python.org/3.11/library/collections.html#ordereddict-objects
+ # specifically we use move_to_end() which standard dicts don't have.
+ self._cache_entries = collections.OrderedDict()
self.cap = cap
self._total = 0
self.min_entries = min_entries
@@ -157,104 +164,148 @@ class InodeCache(object):
def total(self):
return self._total
- def _remove(self, obj, clear):
- if clear:
- # Kernel behavior seems to be that if a file is
- # referenced, its parents remain referenced too. This
- # means has_ref() exits early when a collection is not
- # candidate for eviction.
- #
- # By contrast, in_use() doesn't increment references on
- # parents, so it requires a full tree walk to determine if
- # a collection is a candidate for eviction. This takes
- # .07s for 240000 files, which becomes a major drag when
- # cap_cache is being called several times a second and
- # there are multiple non-evictable collections in the
- # cache.
- #
- # So it is important for performance that we do the
- # has_ref() check first.
-
- if obj.has_ref(True):
- _logger.debug("InodeCache cannot clear inode %i, still referenced", obj.inode)
- return
+ def evict_candidates(self):
+ """Yield entries that are candidates to be evicted
+ and stop when the cache total has shrunk sufficiently.
- if obj.in_use():
- _logger.debug("InodeCache cannot clear inode %i, in use", obj.inode)
- return
+ Implements a LRU cache, when an item is added or touch()ed it
+ goes to the back of the OrderedDict, so items in the front are
+ oldest. The Inodes._remove() function determines if the entry
+ can actually be removed safely.
- obj.kernel_invalidate()
- _logger.debug("InodeCache sent kernel invalidate inode %i", obj.inode)
- obj.clear()
+ """
- # The llfuse lock is released in del_entry(), which is called by
- # Directory.clear(). While the llfuse lock is released, it can happen
- # that a reentrant call removes this entry before this call gets to it.
- # Ensure that the entry is still valid before trying to remove it.
- if obj.inode not in self._entries:
+ if self._total <= self.cap:
return
- self._total -= obj.cache_size
- del self._entries[obj.inode]
- if obj.cache_uuid:
- self._by_uuid[obj.cache_uuid].remove(obj)
- if not self._by_uuid[obj.cache_uuid]:
- del self._by_uuid[obj.cache_uuid]
- obj.cache_uuid = None
- if clear:
- _logger.debug("InodeCache cleared inode %i total now %i", obj.inode, self._total)
+ _logger.debug("InodeCache evict_candidates total %i cap %i entries %i", self._total, self.cap, len(self._cache_entries))
- def cap_cache(self):
- if self._total > self.cap:
- for ent in listvalues(self._entries):
- if self._total < self.cap or len(self._entries) < self.min_entries:
- break
- self._remove(ent, True)
-
- def manage(self, obj):
- if obj.persisted():
- obj.cache_size = obj.objsize()
- self._entries[obj.inode] = obj
- obj.cache_uuid = obj.uuid()
- if obj.cache_uuid:
- if obj.cache_uuid not in self._by_uuid:
- self._by_uuid[obj.cache_uuid] = [obj]
- else:
- if obj not in self._by_uuid[obj.cache_uuid]:
- self._by_uuid[obj.cache_uuid].append(obj)
- self._total += obj.objsize()
- _logger.debug("InodeCache touched inode %i (size %i) (uuid %s) total now %i (%i entries)",
- obj.inode, obj.objsize(), obj.cache_uuid, self._total, len(self._entries))
- self.cap_cache()
+ # Copy this into a deque for two reasons:
+ #
+ # 1. _cache_entries is modified by unmanage() which is called
+ # by _remove
+ #
+ # 2. popping off the front means the reference goes away
+ # immediately intead of sticking around for the lifetime of
+ # "values"
+ values = collections.deque(self._cache_entries.values())
- def touch(self, obj):
- if obj.persisted():
- if obj.inode in self._entries:
- self._remove(obj, False)
- self.manage(obj)
+ while values:
+ if self._total < self.cap or len(self._cache_entries) < self.min_entries:
+ break
+ yield values.popleft()
- def unmanage(self, obj):
- if obj.persisted() and obj.inode in self._entries:
- self._remove(obj, True)
+ def unmanage(self, entry):
+ """Stop managing an object in the cache.
- def find_by_uuid(self, uuid):
- return self._by_uuid.get(uuid, [])
+ This happens when an object is being removed from the inode
+ entries table.
+
+ """
+
+ if entry.inode not in self._cache_entries:
+ return
+
+ # manage cache size running sum
+ self._total -= entry.cache_size
+ entry.cache_size = 0
+
+ # Now forget about it
+ del self._cache_entries[entry.inode]
+
+ def update_cache_size(self, obj):
+ """Update the cache total in response to the footprint of an
+ object changing (usually because it has been loaded or
+ cleared).
+
+ Adds or removes entries to the cache list based on the object
+ cache size.
+
+ """
+
+ if not obj.persisted():
+ return
+
+ if obj.inode in self._cache_entries:
+ self._total -= obj.cache_size
+
+ obj.cache_size = obj.objsize()
+
+ if obj.cache_size > 0 or obj.parent_inode is None:
+ self._total += obj.cache_size
+ self._cache_entries[obj.inode] = obj
+ elif obj.cache_size == 0 and obj.inode in self._cache_entries:
+ del self._cache_entries[obj.inode]
+
+ def touch(self, obj):
+ """Indicate an object was used recently, making it low
+ priority to be removed from the cache.
+
+ """
+ if obj.inode in self._cache_entries:
+ self._cache_entries.move_to_end(obj.inode)
+ return True
+ return False
def clear(self):
- self._entries.clear()
- self._by_uuid.clear()
+ self._cache_entries.clear()
self._total = 0
+ at dataclass
+class RemoveInode:
+ entry: typing.Union[Directory, File]
+ def inode_op(self, inodes, locked_ops):
+ if locked_ops is None:
+ inodes._remove(self.entry)
+ return True
+ else:
+ locked_ops.append(self)
+ return False
+
+ at dataclass
+class InvalidateInode:
+ inode: int
+ def inode_op(self, inodes, locked_ops):
+ llfuse.invalidate_inode(self.inode)
+ return True
+
+ at dataclass
+class InvalidateEntry:
+ inode: int
+ name: str
+ def inode_op(self, inodes, locked_ops):
+ llfuse.invalidate_entry(self.inode, self.name)
+ return True
+
+ at dataclass
+class EvictCandidates:
+ def inode_op(self, inodes, locked_ops):
+ return True
+
+
class Inodes(object):
- """Manage the set of inodes. This is the mapping from a numeric id
- to a concrete File or Directory object"""
+ """Manage the set of inodes.
+
+ This is the mapping from a numeric id to a concrete File or
+ Directory object
- def __init__(self, inode_cache, encoding="utf-8"):
+ """
+
+ def __init__(self, inode_cache, encoding="utf-8", fsns=None, shutdown_started=None):
self._entries = {}
self._counter = itertools.count(llfuse.ROOT_INODE)
self.inode_cache = inode_cache
self.encoding = encoding
- self.deferred_invalidations = []
+ self._fsns = fsns
+ self._shutdown_started = shutdown_started or threading.Event()
+
+ self._inode_remove_queue = queue.Queue()
+ self._inode_remove_thread = threading.Thread(None, self._inode_remove)
+ self._inode_remove_thread.daemon = True
+ self._inode_remove_thread.start()
+
+ self.cap_cache_event = threading.Event()
+ self._by_uuid = collections.defaultdict(list)
def __getitem__(self, item):
return self._entries[item]
@@ -266,50 +317,196 @@ class Inodes(object):
return iter(self._entries.keys())
def items(self):
- return viewitems(self._entries.items())
+ return self._entries.items()
def __contains__(self, k):
return k in self._entries
def touch(self, entry):
+ """Update the access time, adjust the cache position, and
+ notify the _inode_remove thread to recheck the cache.
+
+ """
+
entry._atime = time.time()
- self.inode_cache.touch(entry)
+ if self.inode_cache.touch(entry):
+ self.cap_cache()
+
+ def cap_cache(self):
+ """Notify the _inode_remove thread to recheck the cache."""
+ if not self.cap_cache_event.is_set():
+ self.cap_cache_event.set()
+ self._inode_remove_queue.put(EvictCandidates())
+
+ def update_uuid(self, entry):
+ """Update the Arvados uuid associated with an inode entry.
+
+ This is used to look up inodes that need to be invalidated
+ when a websocket event indicates the object has changed on the
+ API server.
+
+ """
+ if entry.cache_uuid and entry in self._by_uuid[entry.cache_uuid]:
+ self._by_uuid[entry.cache_uuid].remove(entry)
+
+ entry.cache_uuid = entry.uuid()
+ if entry.cache_uuid and entry not in self._by_uuid[entry.cache_uuid]:
+ self._by_uuid[entry.cache_uuid].append(entry)
+
+ if not self._by_uuid[entry.cache_uuid]:
+ del self._by_uuid[entry.cache_uuid]
def add_entry(self, entry):
+ """Assign a numeric inode to a new entry."""
+
entry.inode = next(self._counter)
if entry.inode == llfuse.ROOT_INODE:
entry.inc_ref()
self._entries[entry.inode] = entry
- self.inode_cache.manage(entry)
+
+ self.update_uuid(entry)
+ self.inode_cache.update_cache_size(entry)
+ self.cap_cache()
return entry
def del_entry(self, entry):
- if entry.ref_count == 0:
- self.inode_cache.unmanage(entry)
- del self._entries[entry.inode]
+ """Remove entry from the inode table.
+
+ Indicate this inode entry is pending deletion by setting
+ parent_inode to None. Notify the _inode_remove thread to try
+ and remove it.
+
+ """
+
+ entry.parent_inode = None
+ self._inode_remove_queue.put(RemoveInode(entry))
+ _logger.debug("del_entry on inode %i with refcount %i", entry.inode, entry.ref_count)
+
+ def _inode_remove(self):
+ """Background thread to handle tasks related to invalidating
+ inodes in the kernel, and removing objects from the inodes
+ table entirely.
+
+ """
+
+ locked_ops = collections.deque()
+ while True:
+ blocking_get = True
+ while True:
+ try:
+ qentry = self._inode_remove_queue.get(blocking_get)
+ except queue.Empty:
+ break
+ blocking_get = False
+ if qentry is None:
+ return
+
+ if self._shutdown_started.is_set():
+ continue
+
+ # Process this entry
+ if qentry.inode_op(self, locked_ops):
+ self._inode_remove_queue.task_done()
+
+ # Give up the reference
+ qentry = None
+
+ with llfuse.lock:
+ while locked_ops:
+ if locked_ops.popleft().inode_op(self, None):
+ self._inode_remove_queue.task_done()
+ self.cap_cache_event.clear()
+ for entry in self.inode_cache.evict_candidates():
+ self._remove(entry)
+
+ def wait_remove_queue_empty(self):
+ # used by tests
+ self._inode_remove_queue.join()
+
+ def _remove(self, entry):
+ """Remove an inode entry if possible.
+
+ If the entry is still referenced or in use, don't do anything.
+ If this is not referenced but the parent is still referenced,
+ clear any data held by the object (which may include directory
+ entries under the object) but don't remove it from the inode
+ table.
+
+ """
+ try:
+ if entry.inode is None:
+ # Removed already
+ return
+
+ if entry.inode == llfuse.ROOT_INODE:
+ return
+
+ if entry.in_use():
+ # referenced internally, stay pinned
+ #_logger.debug("InodeCache cannot clear inode %i, in use", entry.inode)
+ return
+
+ # Tell the kernel it should forget about it
+ entry.kernel_invalidate()
+
+ if entry.has_ref():
+ # has kernel reference, could still be accessed.
+ # when the kernel forgets about it, we can delete it.
+ #_logger.debug("InodeCache cannot clear inode %i, is referenced", entry.inode)
+ return
+
+ # commit any pending changes
with llfuse.lock_released:
entry.finalize()
- entry.inode = None
- else:
- entry.dead = True
- _logger.debug("del_entry on inode %i with refcount %i", entry.inode, entry.ref_count)
+
+ # Clear the contents
+ entry.clear()
+
+ if entry.parent_inode is None:
+ _logger.debug("InodeCache forgetting inode %i, object cache_size %i, cache total %i, forget_inode True, inode entries %i, type %s",
+ entry.inode, entry.cache_size, self.inode_cache.total(),
+ len(self._entries), type(entry))
+
+ if entry.cache_uuid:
+ self._by_uuid[entry.cache_uuid].remove(entry)
+ if not self._by_uuid[entry.cache_uuid]:
+ del self._by_uuid[entry.cache_uuid]
+ entry.cache_uuid = None
+
+ self.inode_cache.unmanage(entry)
+
+ del self._entries[entry.inode]
+ entry.inode = None
+
+ except Exception as e:
+ _logger.exception("failed remove")
def invalidate_inode(self, entry):
- if entry.has_ref(False):
+ if entry.has_ref():
# Only necessary if the kernel has previously done a lookup on this
# inode and hasn't yet forgotten about it.
- llfuse.invalidate_inode(entry.inode)
+ self._inode_remove_queue.put(InvalidateInode(entry.inode))
def invalidate_entry(self, entry, name):
- if entry.has_ref(False):
+ if entry.has_ref():
# Only necessary if the kernel has previously done a lookup on this
# inode and hasn't yet forgotten about it.
- llfuse.invalidate_entry(entry.inode, native(name.encode(self.encoding)))
+ self._inode_remove_queue.put(InvalidateEntry(entry.inode, name.encode(self.encoding)))
+
+ def begin_shutdown(self):
+ self._inode_remove_queue.put(None)
+ if self._inode_remove_thread is not None:
+ self._inode_remove_thread.join()
+ self._inode_remove_thread = None
def clear(self):
+ with llfuse.lock_released:
+ self.begin_shutdown()
+
self.inode_cache.clear()
+ self._by_uuid.clear()
- for k,v in viewitems(self._entries):
+ for k,v in self._entries.items():
try:
v.finalize()
except Exception as e:
@@ -317,6 +514,14 @@ class Inodes(object):
self._entries.clear()
+ def forward_slash_subst(self):
+ return self._fsns
+
+ def find_by_uuid(self, uuid):
+ """Return a list of zero or more inode entries corresponding
+ to this Arvados UUID."""
+ return self._by_uuid.get(uuid, [])
+
def catch_exceptions(orig_func):
"""Catch uncaught exceptions and log them consistently."""
@@ -377,14 +582,32 @@ class Operations(llfuse.Operations):
rename_time = fuse_time.labels(op='rename')
flush_time = fuse_time.labels(op='flush')
- def __init__(self, uid, gid, api_client, encoding="utf-8", inode_cache=None, num_retries=4, enable_write=False):
+ def __init__(self, uid, gid, api_client, encoding="utf-8", inode_cache=None, num_retries=4, enable_write=False, fsns=None):
super(Operations, self).__init__()
self._api_client = api_client
if not inode_cache:
inode_cache = InodeCache(cap=256*1024*1024)
- self.inodes = Inodes(inode_cache, encoding=encoding)
+
+ if fsns is None:
+ try:
+ fsns = self._api_client.config()["Collections"]["ForwardSlashNameSubstitution"]
+ except KeyError:
+ # old API server with no FSNS config
+ fsns = '_'
+ else:
+ if fsns == '' or fsns == '/':
+ fsns = None
+
+ # If we get overlapping shutdown events (e.g., fusermount -u
+ # -z and operations.destroy()) llfuse calls forget() on inodes
+ # that have already been deleted. To avoid this, we make
+ # forget() a no-op if called after destroy().
+ self._shutdown_started = threading.Event()
+
+ self.inodes = Inodes(inode_cache, encoding=encoding, fsns=fsns,
+ shutdown_started=self._shutdown_started)
self.uid = uid
self.gid = gid
self.enable_write = enable_write
@@ -397,12 +620,6 @@ class Operations(llfuse.Operations):
# is fully initialized should wait() on this event object.
self.initlock = threading.Event()
- # If we get overlapping shutdown events (e.g., fusermount -u
- # -z and operations.destroy()) llfuse calls forget() on inodes
- # that have already been deleted. To avoid this, we make
- # forget() a no-op if called after destroy().
- self._shutdown_started = threading.Event()
-
self.num_retries = num_retries
self.read_counter = arvados.keep.Counter()
@@ -438,23 +655,26 @@ class Operations(llfuse.Operations):
def metric_count_func(self, opname):
return lambda: int(self.metric_value(opname, "arvmount_fuse_operations_seconds_count"))
+ def begin_shutdown(self):
+ self._shutdown_started.set()
+ self.inodes.begin_shutdown()
+
@destroy_time.time()
@catch_exceptions
def destroy(self):
- self._shutdown_started.set()
+ _logger.debug("arv-mount destroy: start")
+
+ with llfuse.lock_released:
+ self.begin_shutdown()
+
if self.events:
self.events.close()
self.events = None
- # Different versions of llfuse require and forbid us to
- # acquire the lock here. See #8345#note-37, #10805#note-9.
- if LLFUSE_VERSION_0 and llfuse.lock.acquire():
- # llfuse < 0.42
- self.inodes.clear()
- llfuse.lock.release()
- else:
- # llfuse >= 0.42
- self.inodes.clear()
+ self.inodes.clear()
+
+ _logger.debug("arv-mount destroy: complete")
+
def access(self, inode, mode, ctx):
return True
@@ -475,28 +695,34 @@ class Operations(llfuse.Operations):
old_attrs = properties.get("old_attributes") or {}
new_attrs = properties.get("new_attributes") or {}
- for item in self.inodes.inode_cache.find_by_uuid(ev["object_uuid"]):
+ for item in self.inodes.find_by_uuid(ev["object_uuid"]):
item.invalidate()
oldowner = old_attrs.get("owner_uuid")
newowner = ev.get("object_owner_uuid")
for parent in (
- self.inodes.inode_cache.find_by_uuid(oldowner) +
- self.inodes.inode_cache.find_by_uuid(newowner)):
+ self.inodes.find_by_uuid(oldowner) +
+ self.inodes.find_by_uuid(newowner)):
parent.invalidate()
@getattr_time.time()
@catch_exceptions
def getattr(self, inode, ctx=None):
if inode not in self.inodes:
+ _logger.debug("arv-mount getattr: inode %i missing", inode)
raise llfuse.FUSEError(errno.ENOENT)
e = self.inodes[inode]
+ self.inodes.touch(e)
+ parent = None
+ if e.parent_inode:
+ parent = self.inodes[e.parent_inode]
+ self.inodes.touch(parent)
entry = llfuse.EntryAttributes()
entry.st_ino = inode
entry.generation = 0
- entry.entry_timeout = 0
+ entry.entry_timeout = parent.time_to_next_poll() if parent is not None else 0
entry.attr_timeout = e.time_to_next_poll() if e.allow_attr_cache else 0
entry.st_mode = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
@@ -564,18 +790,23 @@ class Operations(llfuse.Operations):
if name == '.':
inode = parent_inode
- else:
- if parent_inode in self.inodes:
- p = self.inodes[parent_inode]
- self.inodes.touch(p)
- if name == '..':
- inode = p.parent_inode
- elif isinstance(p, Directory) and name in p:
- inode = p[name].inode
+ elif parent_inode in self.inodes:
+ p = self.inodes[parent_inode]
+ self.inodes.touch(p)
+ if name == '..':
+ inode = p.parent_inode
+ elif isinstance(p, Directory) and name in p:
+ if p[name].inode is None:
+ _logger.debug("arv-mount lookup: parent_inode %i name '%s' found but inode was None",
+ parent_inode, name)
+ raise llfuse.FUSEError(errno.ENOENT)
+
+ inode = p[name].inode
if inode != None:
_logger.debug("arv-mount lookup: parent_inode %i name '%s' inode %i",
parent_inode, name, inode)
+ self.inodes.touch(self.inodes[inode])
self.inodes[inode].inc_ref()
return self.getattr(inode)
else:
@@ -591,7 +822,7 @@ class Operations(llfuse.Operations):
for inode, nlookup in inodes:
ent = self.inodes[inode]
_logger.debug("arv-mount forget: inode %i nlookup %i ref_count %i", inode, nlookup, ent.ref_count)
- if ent.dec_ref(nlookup) == 0 and ent.dead:
+ if ent.dec_ref(nlookup) == 0 and ent.parent_inode is None:
self.inodes.del_entry(ent)
@open_time.time()
@@ -600,6 +831,7 @@ class Operations(llfuse.Operations):
if inode in self.inodes:
p = self.inodes[inode]
else:
+ _logger.debug("arv-mount open: inode %i missing", inode)
raise llfuse.FUSEError(errno.ENOENT)
if isinstance(p, Directory):
@@ -681,7 +913,7 @@ class Operations(llfuse.Operations):
finally:
self._filehandles[fh].release()
del self._filehandles[fh]
- self.inodes.inode_cache.cap_cache()
+ self.inodes.cap_cache()
def releasedir(self, fh):
self.release(fh)
@@ -694,6 +926,7 @@ class Operations(llfuse.Operations):
if inode in self.inodes:
p = self.inodes[inode]
else:
+ _logger.debug("arv-mount opendir: called with unknown or removed inode %i", inode)
raise llfuse.FUSEError(errno.ENOENT)
if not isinstance(p, Directory):
@@ -703,11 +936,16 @@ class Operations(llfuse.Operations):
if p.parent_inode in self.inodes:
parent = self.inodes[p.parent_inode]
else:
+ _logger.warning("arv-mount opendir: parent inode %i of %i is missing", p.parent_inode, inode)
raise llfuse.FUSEError(errno.EIO)
+ _logger.debug("arv-mount opendir: inode %i fh %i ", inode, fh)
+
# update atime
+ p.inc_use()
+ self._filehandles[fh] = DirectoryHandle(fh, p, [('.', p), ('..', parent)] + p.items())
+ p.dec_use()
self.inodes.touch(p)
- self._filehandles[fh] = DirectoryHandle(fh, p, [('.', p), ('..', parent)] + listitems(p))
return fh
@readdir_time.time()
@@ -722,8 +960,9 @@ class Operations(llfuse.Operations):
e = off
while e < len(handle.entries):
- if handle.entries[e][1].inode in self.inodes:
- yield (handle.entries[e][0].encode(self.inodes.encoding), self.getattr(handle.entries[e][1].inode), e+1)
+ ent = handle.entries[e]
+ if ent[1].inode in self.inodes:
+ yield (ent[0].encode(self.inodes.encoding), self.getattr(ent[1].inode), e+1)
e += 1
@statfs_time.time()
diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py
index 719ec7ee95..45847fde81 100644
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -349,7 +349,15 @@ Filesystem character encoding
metavar='CLASSES',
help="Comma-separated list of storage classes to request for new collections",
)
-
+ # This is a hidden argument used by tests. Normally this
+ # value will be extracted from the cluster config, but mocking
+ # the cluster config under the presence of multiple threads
+ # and processes turned out to be too complicated and brittle.
+ plumbing.add_argument(
+ '--fsns',
+ type=str,
+ default=None,
+ help=argparse.SUPPRESS)
class Mount(object):
def __init__(self, args, logger=logging.getLogger('arvados.arv-mount')):
@@ -514,7 +522,8 @@ class Mount(object):
api_client=self.api,
encoding=self.args.encoding,
inode_cache=InodeCache(cap=self.args.directory_cache),
- enable_write=self.args.enable_write)
+ enable_write=self.args.enable_write,
+ fsns=self.args.fsns)
if self.args.crunchstat_interval:
statsthread = threading.Thread(
@@ -603,7 +612,6 @@ class Mount(object):
e = self.operations.inodes.add_entry(Directory(
llfuse.ROOT_INODE,
self.operations.inodes,
- self.api.config,
self.args.enable_write,
self.args.filters,
))
@@ -688,8 +696,9 @@ From here, the following directories are available:
def _llfuse_main(self):
try:
- llfuse.main()
+ llfuse.main(workers=10)
except:
llfuse.close(unmount=False)
raise
+ self.operations.begin_shutdown()
llfuse.close()
diff --git a/services/fuse/arvados_fuse/fresh.py b/services/fuse/arvados_fuse/fresh.py
index 53214ee94d..508ee7fb73 100644
--- a/services/fuse/arvados_fuse/fresh.py
+++ b/services/fuse/arvados_fuse/fresh.py
@@ -62,7 +62,7 @@ class FreshBase(object):
"""
__slots__ = ("_stale", "_poll", "_last_update", "_atime", "_poll_time", "use_count",
- "ref_count", "dead", "cache_size", "cache_uuid", "allow_attr_cache")
+ "ref_count", "cache_size", "cache_uuid", "allow_attr_cache")
def __init__(self):
self._stale = True
@@ -72,7 +72,6 @@ class FreshBase(object):
self._poll_time = 60
self.use_count = 0
self.ref_count = 0
- self.dead = False
self.cache_size = 0
self.cache_uuid = None
@@ -125,17 +124,11 @@ class FreshBase(object):
self.ref_count -= n
return self.ref_count
- def has_ref(self, only_children):
+ def has_ref(self):
"""Determine if there are any kernel references to this
- object or its children.
-
- If only_children is True, ignore refcount of self and only consider
- children.
+ object.
"""
- if only_children:
- return False
- else:
- return self.ref_count > 0
+ return self.ref_count > 0
def objsize(self):
return 0
diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py
index e3b8dd4c2c..9c78805107 100644
--- a/services/fuse/arvados_fuse/fusedir.py
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -36,7 +36,9 @@ class Directory(FreshBase):
and the value referencing a File or Directory object.
"""
- def __init__(self, parent_inode, inodes, apiconfig, enable_write, filters):
+ __slots__ = ("inode", "parent_inode", "inodes", "_entries", "_mtime", "_enable_write", "_filters")
+
+ def __init__(self, parent_inode, inodes, enable_write, filters):
"""parent_inode is the integer inode number"""
super(Directory, self).__init__()
@@ -46,7 +48,6 @@ class Directory(FreshBase):
raise Exception("parent_inode should be an int")
self.parent_inode = parent_inode
self.inodes = inodes
- self.apiconfig = apiconfig
self._entries = {}
self._mtime = time.time()
self._enable_write = enable_write
@@ -64,23 +65,9 @@ class Directory(FreshBase):
else:
yield [f_name, *f[1:]]
- def forward_slash_subst(self):
- if not hasattr(self, '_fsns'):
- self._fsns = None
- config = self.apiconfig()
- try:
- self._fsns = config["Collections"]["ForwardSlashNameSubstitution"]
- except KeyError:
- # old API server with no FSNS config
- self._fsns = '_'
- else:
- if self._fsns == '' or self._fsns == '/':
- self._fsns = None
- return self._fsns
-
def unsanitize_filename(self, incoming):
"""Replace ForwardSlashNameSubstitution value with /"""
- fsns = self.forward_slash_subst()
+ fsns = self.inodes.forward_slash_subst()
if isinstance(fsns, str):
return incoming.replace(fsns, '/')
else:
@@ -99,7 +86,7 @@ class Directory(FreshBase):
elif dirty == '..':
return '__'
else:
- fsns = self.forward_slash_subst()
+ fsns = self.inodes.forward_slash_subst()
if isinstance(fsns, str):
dirty = dirty.replace('/', fsns)
return _disallowed_filename_characters.sub('_', dirty)
@@ -150,6 +137,10 @@ class Directory(FreshBase):
self.inodes.touch(self)
super(Directory, self).fresh()
+ def objsize(self):
+ # Rough estimate of memory footprint based on using pympler
+ return len(self._entries) * 1024
+
def merge(self, items, fn, same, new_entry):
"""Helper method for updating the contents of the directory.
@@ -157,16 +148,17 @@ class Directory(FreshBase):
entries that are the same in both the old and new lists, create new
entries, and delete old entries missing from the new list.
- :items: iterable with new directory contents
+ Arguments:
+ * items: Iterable --- New directory contents
- :fn: function to take an entry in 'items' and return the desired file or
+ * fn: Callable --- Takes an entry in 'items' and return the desired file or
directory name, or None if this entry should be skipped
- :same: function to compare an existing entry (a File or Directory
+ * same: Callable --- Compare an existing entry (a File or Directory
object) with an entry in the items list to determine whether to keep
the existing entry.
- :new_entry: function to create a new directory entry (File or Directory
+ * new_entry: Callable --- Create a new directory entry (File or Directory
object) from an entry in the items list.
"""
@@ -176,29 +168,43 @@ class Directory(FreshBase):
changed = False
for i in items:
name = self.sanitize_filename(fn(i))
- if name:
- if name in oldentries and same(oldentries[name], i):
+ if not name:
+ continue
+ if name in oldentries:
+ ent = oldentries[name]
+ if same(ent, i) and ent.parent_inode == self.inode:
# move existing directory entry over
- self._entries[name] = oldentries[name]
+ self._entries[name] = ent
del oldentries[name]
- else:
- _logger.debug("Adding entry '%s' to inode %i", name, self.inode)
- # create new directory entry
- ent = new_entry(i)
- if ent is not None:
- self._entries[name] = self.inodes.add_entry(ent)
- changed = True
+ self.inodes.inode_cache.touch(ent)
+
+ for i in items:
+ name = self.sanitize_filename(fn(i))
+ if not name:
+ continue
+ if name not in self._entries:
+ # create new directory entry
+ ent = new_entry(i)
+ if ent is not None:
+ self._entries[name] = self.inodes.add_entry(ent)
+ # need to invalidate this just in case there was a
+ # previous entry that couldn't be moved over or a
+ # lookup that returned file not found and cached
+ # a negative result
+ self.inodes.invalidate_entry(self, name)
+ changed = True
+ _logger.debug("Added entry '%s' as inode %i to parent inode %i", name, ent.inode, self.inode)
# delete any other directory entries that were not in found in 'items'
- for i in oldentries:
- _logger.debug("Forgetting about entry '%s' on inode %i", i, self.inode)
- self.inodes.invalidate_entry(self, i)
- self.inodes.del_entry(oldentries[i])
+ for name, ent in oldentries.items():
+ _logger.debug("Detaching entry '%s' from parent_inode %i", name, self.inode)
+ self.inodes.invalidate_entry(self, name)
+ self.inodes.del_entry(ent)
changed = True
if changed:
- self.inodes.invalidate_inode(self)
self._mtime = time.time()
+ self.inodes.inode_cache.update_cache_size(self)
self.fresh()
@@ -210,27 +216,27 @@ class Directory(FreshBase):
return True
return False
- def has_ref(self, only_children):
- if super(Directory, self).has_ref(only_children):
- return True
- for v in self._entries.values():
- if v.has_ref(False):
- return True
- return False
-
def clear(self):
"""Delete all entries"""
+ if not self._entries:
+ return
oldentries = self._entries
self._entries = {}
- for n in oldentries:
- oldentries[n].clear()
- self.inodes.del_entry(oldentries[n])
self.invalidate()
+ for name, ent in oldentries.items():
+ ent.clear()
+ self.inodes.invalidate_entry(self, name)
+ self.inodes.del_entry(ent)
+ self.inodes.inode_cache.update_cache_size(self)
def kernel_invalidate(self):
# Invalidating the dentry on the parent implies invalidating all paths
# below it as well.
- parent = self.inodes[self.parent_inode]
+ if self.parent_inode in self.inodes:
+ parent = self.inodes[self.parent_inode]
+ else:
+ # parent was removed already.
+ return
# Find self on the parent in order to invalidate this path.
# Calling the public items() method might trigger a refresh,
@@ -283,9 +289,10 @@ class CollectionDirectoryBase(Directory):
"""
- def __init__(self, parent_inode, inodes, apiconfig, enable_write, filters, collection, collection_root):
- super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, apiconfig, enable_write, filters)
- self.apiconfig = apiconfig
+ __slots__ = ("collection", "collection_root", "collection_record_file")
+
+ def __init__(self, parent_inode, inodes, enable_write, filters, collection, collection_root):
+ super(CollectionDirectoryBase, self).__init__(parent_inode, inodes, enable_write, filters)
self.collection = collection
self.collection_root = collection_root
self.collection_record_file = None
@@ -293,17 +300,16 @@ class CollectionDirectoryBase(Directory):
def new_entry(self, name, item, mtime):
name = self.sanitize_filename(name)
if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
- if item.fuse_entry.dead is not True:
- raise Exception("Can only reparent dead inode entry")
+ if item.fuse_entry.parent_inode is not None:
+ raise Exception("Can only reparent unparented inode entry")
if item.fuse_entry.inode is None:
raise Exception("Reparented entry must still have valid inode")
- item.fuse_entry.dead = False
+ item.fuse_entry.parent_inode = self.inode
self._entries[name] = item.fuse_entry
elif isinstance(item, arvados.collection.RichCollectionBase):
self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(
self.inode,
self.inodes,
- self.apiconfig,
self._enable_write,
self._filters,
item,
@@ -449,14 +455,23 @@ class CollectionDirectoryBase(Directory):
def clear(self):
super(CollectionDirectoryBase, self).clear()
+ if self.collection is not None:
+ self.collection.unsubscribe()
self.collection = None
+ def objsize(self):
+ # objsize for the whole collection is represented at the root,
+ # don't double-count it
+ return 0
class CollectionDirectory(CollectionDirectoryBase):
"""Represents the root of a directory tree representing a collection."""
+ __slots__ = ("api", "num_retries", "collection_locator",
+ "_manifest_size", "_writable", "_updating_lock")
+
def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters=None, collection_record=None, explicit_collection=None):
- super(CollectionDirectory, self).__init__(parent_inode, inodes, api.config, enable_write, filters, None, self)
+ super(CollectionDirectory, self).__init__(parent_inode, inodes, enable_write, filters, None, self)
self.api = api
self.num_retries = num_retries
self._poll = True
@@ -514,7 +529,10 @@ class CollectionDirectory(CollectionDirectoryBase):
if self.collection_record_file is not None:
self.collection_record_file.invalidate()
self.inodes.invalidate_inode(self.collection_record_file)
- _logger.debug("%s invalidated collection record file", self)
+ _logger.debug("parent_inode %s invalidated collection record file inode %s", self.inode,
+ self.collection_record_file.inode)
+ self.inodes.update_uuid(self)
+ self.inodes.inode_cache.update_cache_size(self)
self.fresh()
def uuid(self):
@@ -592,6 +610,7 @@ class CollectionDirectory(CollectionDirectoryBase):
return False
@use_counter
+ @check_update
def collection_record(self):
self.flush()
return self.collection.api_response()
@@ -625,22 +644,32 @@ class CollectionDirectory(CollectionDirectoryBase):
return (self.collection_locator is not None)
def objsize(self):
- # This is an empirically-derived heuristic to estimate the memory used
- # to store this collection's metadata. Calculating the memory
- # footprint directly would be more accurate, but also more complicated.
- return self._manifest_size * 128
+ # This is a rough guess of the amount of overhead involved for
+ # a collection; the assumptions are that that each file
+ # averages 128 bytes in the manifest, but consume 1024 bytes
+ # of Python data structures, so 1024/128=8 means we estimate
+ # the RAM footprint at 8 times the size of bare manifest text.
+ return self._manifest_size * 8
def finalize(self):
- if self.collection is not None:
- if self.writable():
+ if self.collection is None:
+ return
+
+ if self.writable():
+ try:
self.collection.save()
- self.collection.stop_threads()
+ except Exception as e:
+ _logger.exception("Failed to save collection %s", self.collection_locator)
+ self.collection.stop_threads()
def clear(self):
if self.collection is not None:
self.collection.stop_threads()
- super(CollectionDirectory, self).clear()
self._manifest_size = 0
+ super(CollectionDirectory, self).clear()
+ if self.collection_record_file is not None:
+ self.inodes.del_entry(self.collection_record_file)
+ self.collection_record_file = None
class TmpCollectionDirectory(CollectionDirectoryBase):
@@ -667,7 +696,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
# This is always enable_write=True because it never tries to
# save to the backend
super(TmpCollectionDirectory, self).__init__(
- parent_inode, inodes, api_client.config, True, filters, collection, self)
+ parent_inode, inodes, True, filters, collection, self)
self.populate(self.mtime())
def on_event(self, *args, **kwargs):
@@ -689,7 +718,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
with self.collection.lock:
self.collection_record_file.invalidate()
self.inodes.invalidate_inode(self.collection_record_file)
- _logger.debug("%s invalidated collection record", self)
+ _logger.debug("%s invalidated collection record", self.inode)
finally:
while lockcount > 0:
self.collection.lock.acquire()
@@ -764,7 +793,7 @@ and the directory will appear if it exists.
""".lstrip()
def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters, pdh_only=False, storage_classes=None):
- super(MagicDirectory, self).__init__(parent_inode, inodes, api.config, enable_write, filters)
+ super(MagicDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self.pdh_only = pdh_only
@@ -863,7 +892,7 @@ class TagsDirectory(Directory):
"""A special directory that contains as subdirectories all tags visible to the user."""
def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters, poll_time=60):
- super(TagsDirectory, self).__init__(parent_inode, inodes, api.config, enable_write, filters)
+ super(TagsDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self._poll = True
@@ -943,7 +972,7 @@ class TagDirectory(Directory):
def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters, tag,
poll=False, poll_time=60):
- super(TagDirectory, self).__init__(parent_inode, inodes, api.config, enable_write, filters)
+ super(TagDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self.tag = tag
@@ -984,9 +1013,13 @@ class TagDirectory(Directory):
class ProjectDirectory(Directory):
"""A special directory that contains the contents of a project."""
+ __slots__ = ("api", "num_retries", "project_object", "project_object_file",
+ "project_uuid", "_updating_lock",
+ "_current_user", "_full_listing", "storage_classes", "recursively_contained")
+
def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters,
project_object, poll=True, poll_time=3, storage_classes=None):
- super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config, enable_write, filters)
+ super(ProjectDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self.project_object = project_object
@@ -998,6 +1031,19 @@ class ProjectDirectory(Directory):
self._current_user = None
self._full_listing = False
self.storage_classes = storage_classes
+ self.recursively_contained = False
+
+ # Filter groups can contain themselves, which causes tools
+ # that walk the filesystem to get stuck in an infinite loop,
+ # so suppress returning a listing in that case.
+ if self.project_object.get("group_class") == "filter":
+ iter_parent_inode = parent_inode
+ while iter_parent_inode != llfuse.ROOT_INODE:
+ parent_dir = self.inodes[iter_parent_inode]
+ if isinstance(parent_dir, ProjectDirectory) and parent_dir.project_uuid == self.project_uuid:
+ self.recursively_contained = True
+ break
+ iter_parent_inode = parent_dir.parent_inode
def want_event_subscribe(self):
return True
@@ -1048,7 +1094,7 @@ class ProjectDirectory(Directory):
self.project_object_file = ObjectFile(self.inode, self.project_object)
self.inodes.add_entry(self.project_object_file)
- if not self._full_listing:
+ if self.recursively_contained or not self._full_listing:
return True
def samefn(a, i):
@@ -1092,7 +1138,6 @@ class ProjectDirectory(Directory):
*self._filters_for('collections', qualified=True),
],
) if obj['current_version_uuid'] == obj['uuid'])
-
# end with llfuse.lock_released, re-acquire lock
self.merge(contents,
@@ -1175,6 +1220,12 @@ class ProjectDirectory(Directory):
def persisted(self):
return True
+ def clear(self):
+ super(ProjectDirectory, self).clear()
+ if self.project_object_file is not None:
+ self.inodes.del_entry(self.project_object_file)
+ self.project_object_file = None
+
@use_counter
@check_update
def mkdir(self, name):
@@ -1294,7 +1345,7 @@ class SharedDirectory(Directory):
def __init__(self, parent_inode, inodes, api, num_retries, enable_write, filters,
exclude, poll=False, poll_time=60, storage_classes=None):
- super(SharedDirectory, self).__init__(parent_inode, inodes, api.config, enable_write, filters)
+ super(SharedDirectory, self).__init__(parent_inode, inodes, enable_write, filters)
self.api = api
self.num_retries = num_retries
self.current_user = api.users().current().execute(num_retries=num_retries)
diff --git a/services/fuse/arvados_fuse/fusefile.py b/services/fuse/arvados_fuse/fusefile.py
index 45d3db16fe..9279f7d99d 100644
--- a/services/fuse/arvados_fuse/fusefile.py
+++ b/services/fuse/arvados_fuse/fusefile.py
@@ -80,9 +80,17 @@ class FuseArvadosFile(File):
if self.writable():
self.arvfile.parent.root_collection().save()
+ def clear(self):
+ if self.parent_inode is None:
+ self.arvfile.fuse_entry = None
+ self.arvfile = None
+
class StringFile(File):
"""Wrap a simple string as a file"""
+
+ __slots__ = ("contents",)
+
def __init__(self, parent_inode, contents, _mtime):
super(StringFile, self).__init__(parent_inode, _mtime)
self.contents = contents
@@ -97,6 +105,8 @@ class StringFile(File):
class ObjectFile(StringFile):
"""Wrap a dict as a serialized json object."""
+ __slots__ = ("object_uuid",)
+
def __init__(self, parent_inode, obj):
super(ObjectFile, self).__init__(parent_inode, "", 0)
self.object_uuid = obj['uuid']
@@ -125,6 +135,9 @@ class FuncToJSONFile(StringFile):
The function is called at the time the file is read. The result is
cached until invalidate() is called.
"""
+
+ __slots__ = ("func",)
+
def __init__(self, parent_inode, func):
super(FuncToJSONFile, self).__init__(parent_inode, "", 0)
self.func = func
diff --git a/services/fuse/setup.py b/services/fuse/setup.py
index d0c46f1320..4e50c24a09 100644
--- a/services/fuse/setup.py
+++ b/services/fuse/setup.py
@@ -44,7 +44,7 @@ setup(name='arvados_fuse',
],
install_requires=[
'arvados-python-client{}'.format(pysdk_dep),
- 'llfuse >= 1.3.6',
+ 'arvados-llfuse >= 1.5.1',
'future',
'python-daemon',
'ciso8601 >= 2.0.0',
diff --git a/services/fuse/tests/integration_test.py b/services/fuse/tests/integration_test.py
index 89b39dbc87..e80b6983a1 100644
--- a/services/fuse/tests/integration_test.py
+++ b/services/fuse/tests/integration_test.py
@@ -86,7 +86,7 @@ class IntegrationTest(unittest.TestCase):
with arvados_fuse.command.Mount(
arvados_fuse.command.ArgumentParser().parse_args(
argv + ['--foreground',
- '--unmount-timeout=2',
+ '--unmount-timeout=60',
self.mnt])) as self.mount:
return func(self, *args, **kwargs)
finally:
diff --git a/services/fuse/tests/mount_test_base.py b/services/fuse/tests/mount_test_base.py
index 8a3522e0cb..02f4009724 100644
--- a/services/fuse/tests/mount_test_base.py
+++ b/services/fuse/tests/mount_test_base.py
@@ -102,10 +102,10 @@ class MountTestBase(unittest.TestCase):
self.operations.events.close(timeout=10)
subprocess.call(["fusermount", "-u", "-z", self.mounttmp])
t0 = time.time()
- self.llfuse_thread.join(timeout=10)
+ self.llfuse_thread.join(timeout=60)
if self.llfuse_thread.is_alive():
logger.warning("MountTestBase.tearDown():"
- " llfuse thread still alive 10s after umount"
+ " llfuse thread still alive 60s after umount"
" -- exiting with SIGKILL")
os.kill(os.getpid(), signal.SIGKILL)
waited = time.time() - t0
diff --git a/services/fuse/tests/test_inodes.py b/services/fuse/tests/test_inodes.py
index 07e6036d08..c5c92a9b3f 100644
--- a/services/fuse/tests/test_inodes.py
+++ b/services/fuse/tests/test_inodes.py
@@ -9,9 +9,14 @@ import llfuse
import logging
class InodeTests(unittest.TestCase):
+
+ # The following tests call next(inodes._counter) because inode 1
+ # (the root directory) gets special treatment.
+
def test_inodes_basic(self):
cache = arvados_fuse.InodeCache(1000, 4)
inodes = arvados_fuse.Inodes(cache)
+ next(inodes._counter)
# Check that ent1 gets added to inodes
ent1 = mock.MagicMock()
@@ -27,6 +32,7 @@ class InodeTests(unittest.TestCase):
def test_inodes_not_persisted(self):
cache = arvados_fuse.InodeCache(1000, 4)
inodes = arvados_fuse.Inodes(cache)
+ next(inodes._counter)
ent1 = mock.MagicMock()
ent1.in_use.return_value = False
@@ -48,6 +54,7 @@ class InodeTests(unittest.TestCase):
def test_inode_cleared(self):
cache = arvados_fuse.InodeCache(1000, 4)
inodes = arvados_fuse.Inodes(cache)
+ next(inodes._counter)
# Check that ent1 gets added to inodes
ent1 = mock.MagicMock()
@@ -68,25 +75,31 @@ class InodeTests(unittest.TestCase):
inodes.add_entry(ent3)
# Won't clear anything because min_entries = 4
- self.assertEqual(2, len(cache._entries))
+ self.assertEqual(2, len(cache._cache_entries))
self.assertFalse(ent1.clear.called)
self.assertEqual(1100, cache.total())
# Change min_entries
cache.min_entries = 1
- cache.cap_cache()
+ ent1.parent_inode = None
+ inodes.cap_cache()
+ inodes.wait_remove_queue_empty()
self.assertEqual(600, cache.total())
self.assertTrue(ent1.clear.called)
# Touching ent1 should cause ent3 to get cleared
+ ent3.parent_inode = None
self.assertFalse(ent3.clear.called)
- cache.touch(ent1)
+ inodes.inode_cache.update_cache_size(ent1)
+ inodes.touch(ent1)
+ inodes.wait_remove_queue_empty()
self.assertTrue(ent3.clear.called)
self.assertEqual(500, cache.total())
def test_clear_in_use(self):
cache = arvados_fuse.InodeCache(1000, 4)
inodes = arvados_fuse.Inodes(cache)
+ next(inodes._counter)
ent1 = mock.MagicMock()
ent1.in_use.return_value = True
@@ -109,10 +122,12 @@ class InodeTests(unittest.TestCase):
ent3.clear.called = False
self.assertFalse(ent1.clear.called)
self.assertFalse(ent3.clear.called)
- cache.touch(ent3)
+ inodes.touch(ent3)
+ inodes.wait_remove_queue_empty()
self.assertFalse(ent1.clear.called)
self.assertFalse(ent3.clear.called)
- self.assertFalse(ent3.kernel_invalidate.called)
+ # kernel invalidate gets called anyway
+ self.assertTrue(ent3.kernel_invalidate.called)
self.assertEqual(1100, cache.total())
# ent1 still in use, ent3 doesn't have ref,
@@ -120,14 +135,17 @@ class InodeTests(unittest.TestCase):
ent3.has_ref.return_value = False
ent1.clear.called = False
ent3.clear.called = False
- cache.touch(ent3)
+ ent3.parent_inode = None
+ inodes.touch(ent3)
+ inodes.wait_remove_queue_empty()
self.assertFalse(ent1.clear.called)
self.assertTrue(ent3.clear.called)
self.assertEqual(500, cache.total())
def test_delete(self):
- cache = arvados_fuse.InodeCache(1000, 4)
+ cache = arvados_fuse.InodeCache(1000, 0)
inodes = arvados_fuse.Inodes(cache)
+ next(inodes._counter)
ent1 = mock.MagicMock()
ent1.in_use.return_value = False
@@ -147,6 +165,9 @@ class InodeTests(unittest.TestCase):
ent1.ref_count = 0
with llfuse.lock:
inodes.del_entry(ent1)
+ inodes.wait_remove_queue_empty()
self.assertEqual(0, cache.total())
- cache.touch(ent3)
+
+ inodes.add_entry(ent3)
+ inodes.wait_remove_queue_empty()
self.assertEqual(600, cache.total())
diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py
index ef9c25bcf5..b3bec39cc5 100644
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -1127,7 +1127,7 @@ class MagicDirApiError(FuseMagicTest):
class SanitizeFilenameTest(MountTestBase):
def test_sanitize_filename(self):
pdir = fuse.ProjectDirectory(
- 1, {}, self.api, 0, False, None,
+ 1, fuse.Inodes(None), self.api, 0, False, None,
project_object=self.api.users().current().execute(),
)
acceptable = [
@@ -1227,23 +1227,22 @@ class SlashSubstitutionTest(IntegrationTest):
mnt_args = [
'--read-write',
'--mount-home', 'zzz',
+ '--fsns', '[SLASH]'
]
def setUp(self):
super(SlashSubstitutionTest, self).setUp()
+
self.api = arvados.safeapi.ThreadSafeApiCache(
arvados.config.settings(),
- version='v1',
+ version='v1'
)
- self.api.config = lambda: {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
self.testcoll = self.api.collections().create(body={"name": "foo/bar/baz"}).execute()
self.testcolleasy = self.api.collections().create(body={"name": "foo-bar-baz"}).execute()
self.fusename = 'foo[SLASH]bar[SLASH]baz'
@IntegrationTest.mount(argv=mnt_args)
- @mock.patch('arvados.util.get_config_once')
- def test_slash_substitution_before_listing(self, get_config_once):
- get_config_once.return_value = {"Collections": {"ForwardSlashNameSubstitution": "[SLASH]"}}
+ def test_slash_substitution_before_listing(self):
self.pool_test(os.path.join(self.mnt, 'zzz'), self.fusename)
self.checkContents()
@staticmethod
diff --git a/services/fuse/tests/test_unmount.py b/services/fuse/tests/test_unmount.py
index e89571087e..6a19b33454 100644
--- a/services/fuse/tests/test_unmount.py
+++ b/services/fuse/tests/test_unmount.py
@@ -31,11 +31,11 @@ class UnmountTest(IntegrationTest):
self.mnt])
subprocess.check_call(
['./bin/arv-mount', '--subtype', 'test', '--replace',
- '--unmount-timeout', '10',
+ '--unmount-timeout', '60',
self.mnt])
subprocess.check_call(
['./bin/arv-mount', '--subtype', 'test', '--replace',
- '--unmount-timeout', '10',
+ '--unmount-timeout', '60',
self.mnt,
'--exec', 'true'])
for m in subprocess.check_output(['mount']).splitlines():
commit e16cc17af3c60785f12fd6c3ecf01af0be74842b
Author: Brett Smith <brett.smith at curii.com>
Date: Tue Mar 5 11:31:22 2024 -0500
Merge branch '21504-arv-mount-reference'
Refs #21504.
Arvados-DCO-1.1-Signed-off-by: Brett Smith <brett.smith at curii.com>
diff --git a/doc/_config.yml b/doc/_config.yml
index a6b3b28a6d..cda3eaec3f 100644
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -83,28 +83,29 @@ navbar:
- sdk/python/api-client.html.textile.liquid
- sdk/python/cookbook.html.textile.liquid
- sdk/python/python.html.textile.liquid
- - sdk/python/arvados-fuse.html.textile.liquid
- sdk/python/arvados-cwl-runner.html.textile.liquid
- sdk/python/events.html.textile.liquid
- - CLI:
+ - Command line tools (CLI SDK):
- sdk/cli/install.html.textile.liquid
- sdk/cli/index.html.textile.liquid
- sdk/cli/reference.html.textile.liquid
- sdk/cli/subcommands.html.textile.liquid
- - sdk/cli/project-management.html.textile.liquid
+ - FUSE Driver:
+ - sdk/fuse/install.html.textile.liquid
+ - sdk/fuse/options.html.textile.liquid
- Go:
- sdk/go/index.html.textile.liquid
- sdk/go/example.html.textile.liquid
+ - Java:
+ - sdk/java-v2/index.html.textile.liquid
+ - sdk/java-v2/example.html.textile.liquid
+ - sdk/java-v2/javadoc.html.textile.liquid
- R:
- sdk/R/index.html.md
- sdk/R/arvados/index.html.textile.liquid
- Ruby:
- sdk/ruby/index.html.textile.liquid
- sdk/ruby/example.html.textile.liquid
- - Java v2:
- - sdk/java-v2/index.html.textile.liquid
- - sdk/java-v2/example.html.textile.liquid
- - sdk/java-v2/javadoc.html.textile.liquid
api:
- Concepts:
- api/index.html.textile.liquid
diff --git a/doc/install/install-shell-server.html.textile.liquid b/doc/install/install-shell-server.html.textile.liquid
index 57b79d2042..f864f37563 100644
--- a/doc/install/install-shell-server.html.textile.liquid
+++ b/doc/install/install-shell-server.html.textile.liquid
@@ -35,7 +35,7 @@ h2(#dependencies). Install Dependencies and SDKs
# "Install Ruby and Bundler":ruby.html
# "Install the Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html
-# "Install the FUSE driver":{{site.baseurl}}/sdk/python/arvados-fuse.html
+# "Install the FUSE driver":{{site.baseurl}}/sdk/fuse/install.html
# "Install the CLI":{{site.baseurl}}/sdk/cli/install.html
# "Install the R SDK":{{site.baseurl}}/sdk/R/index.html (optional)
# "Install Docker":install-docker.html (optional)
diff --git a/doc/sdk/cli/index.html.textile.liquid b/doc/sdk/cli/index.html.textile.liquid
index 511a41e0b8..ea10c830bc 100644
--- a/doc/sdk/cli/index.html.textile.liquid
+++ b/doc/sdk/cli/index.html.textile.liquid
@@ -1,7 +1,7 @@
---
layout: default
navsection: sdk
-navmenu: CLI
+navmenu: Command line tools (CLI SDK)
title: "Overview"
...
diff --git a/doc/sdk/cli/install.html.textile.liquid b/doc/sdk/cli/install.html.textile.liquid
index 9657d236ad..e0d50b874b 100644
--- a/doc/sdk/cli/install.html.textile.liquid
+++ b/doc/sdk/cli/install.html.textile.liquid
@@ -1,7 +1,7 @@
---
layout: default
navsection: sdk
-navmenu: CLI
+navmenu: Command line tools (CLI SDK)
title: "Installation"
...
{% comment %}
diff --git a/doc/sdk/cli/reference.html.textile.liquid b/doc/sdk/cli/reference.html.textile.liquid
index 735ba5ca87..307fecd9a0 100644
--- a/doc/sdk/cli/reference.html.textile.liquid
+++ b/doc/sdk/cli/reference.html.textile.liquid
@@ -1,7 +1,7 @@
---
layout: default
navsection: sdk
-navmenu: CLI
+navmenu: Command line tools (CLI SDK)
title: "arv reference"
...
{% comment %}
diff --git a/doc/sdk/cli/subcommands.html.textile.liquid b/doc/sdk/cli/subcommands.html.textile.liquid
index 5dda77ab5e..dadb1d56c7 100644
--- a/doc/sdk/cli/subcommands.html.textile.liquid
+++ b/doc/sdk/cli/subcommands.html.textile.liquid
@@ -1,7 +1,7 @@
---
layout: default
navsection: sdk
-navmenu: CLI
+navmenu: Command line tools (CLI SDK)
title: "arv subcommands"
...
diff --git a/doc/sdk/fuse/install.html.textile.liquid b/doc/sdk/fuse/install.html.textile.liquid
new file mode 100644
index 0000000000..52ffb2bbd1
--- /dev/null
+++ b/doc/sdk/fuse/install.html.textile.liquid
@@ -0,0 +1,42 @@
+---
+layout: default
+navsection: sdk
+navmenu: FUSE Driver
+title: Installing the FUSE Driver
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+The Arvados FUSE driver is a Python utility that allows you to browse Arvados projects and collections in a filesystem, so you can access that data using existing Unix tools.
+
+h2. Installation
+
+If you are logged in to a managed Arvados VM, the @arv-mount@ utility should already be installed.
+
+To use the FUSE driver elsewhere, you can install from a distribution package or pip.
+
+h2. Option 1: Install from distribution packages
+
+First, "add the appropriate package repository for your distribution":{{ site.baseurl }}/install/packages.html.
+
+{% assign arvados_component = 'python3-arvados-fuse' %}
+
+{% include 'install_packages' %}
+
+h2. Option 2: Install with pip
+
+Run @pip install arvados_fuse@ in an appropriate installation environment, such as a virtualenv.
+
+Note: The FUSE driver depends on the @libcurl@ and @libfuse@ C libraries. To install the module you may need to install development headers from your distribution. On Debian-based distributions you can install them by running:
+
+<notextile>
+<pre><code># <span class="userinput">apt install build-essential python3-dev libcurl4-openssl-dev libfuse-dev libssl-dev</span>
+</code></pre>
+</notextile>
+
+h2. Usage
+
+For an introduction of how to mount and navigate data, refer to the "Access Keep as a GNU/Linux filesystem":{{site.baseurl}}/user/tutorials/tutorial-keep-mount-gnu-linux.html tutorial.
diff --git a/doc/sdk/fuse/options.html.textile.liquid b/doc/sdk/fuse/options.html.textile.liquid
new file mode 100644
index 0000000000..1ebfa242a5
--- /dev/null
+++ b/doc/sdk/fuse/options.html.textile.liquid
@@ -0,0 +1,193 @@
+---
+layout: default
+navsection: sdk
+navmenu: FUSE Driver
+title: arv-mount options
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+This page documents all available @arv-mount@ options with some usage examples.
+
+# "Mount contents":#contents
+# "Mount custom layout and filtering":#layout
+## "@--filters@ usage and limitations":#filters
+# "Mount access and permissions":#access
+# "Mount lifecycle management":#lifecycle
+# "Mount logging and statistics":#logging
+# "Mount local cache setup":#cache
+# "Mount interactions with Arvados and Linux":#plumbing
+# "Examples":#examples
+## "Using @--exec@":#exec
+## "Running arv-mount as a systemd service":#systemd
+
+h2(#contents). Mount contents
+
+table(table table-bordered table-condensed).
+|_. Option(s)|_. Description|
+|@--all@|Mount a subdirectory for each mode: @home@, @shared@, @by_id@, and @by_tag@ (default if no @--mount-*@ options are given)|
+|@--custom@|Mount a subdirectory for each mode specified by a @--mount-*@ option (default if any @--mount-*@ options are given; see "Mount custom layout and filtering":#layout section)|
+|@--collection UUID_OR_PDH@|Mount the specified collection|
+|@--home@|Mount your home project|
+|@--project UUID@|Mount the specified project|
+|@--shared@|Mount a subdirectory for each project shared with you|
+|@--by-id@|Mount a magic directory where collections and projects are accessible through subdirectories named after their UUID or portable data hash|
+|@--by-pdh@|Mount a magic directory where collections are accessible through subdirectories named after their portable data hash|
+|@--by-tag@|Mount a subdirectory for each tag attached to a collection or project|
+
+h2(#layout). Mount custom layout and filtering
+
+table(table table-bordered table-condensed).
+|_. Option(s)|_. Description|
+|@--filters FILTERS@|Filters to apply to all project, shared, and tag directory contents. Pass filters as either a JSON string or a path to a JSON file. The JSON object should be a list of filters in "Arvados API list filter syntax":{{ site.baseurl }}/api/methods.html#filters. See the "example filters":#filters.|
+|@--mount-home PATH@|Make your home project available under the mount at @PATH@|
+|@--mount-shared PATH@|Make projects shared with you available under the mount at @PATH@|
+|@--mount-tmp PATH@|Make a new temporary writable collection available under the mount at @PATH at . This collection is deleted when the mount is unmounted.|
+|@--mount-by-id PATH@|Make a magic directory available under the mount at @PATH@ where collections and projects are accessible through subdirectories named after their UUID or portable data hash|
+|@--mount-by-pdh PATH@|Make a magic directory available under the mount at @PATH@ where collections are accessible through subdirectories named after portable data hash|
+|@--mount-by-tag PATH@|Make a subdirectory for each tag attached to a collection or project available under the mount at @PATH@|
+
+h3(#filters). @--filters@ usage and limitations
+
+Your argument to @--filters@ should be a JSON list of filters in "Arvados API list filter syntax":{{ site.baseurl }}/api/methods.html#filters. If your filter checks any field besides @uuid@, you should prefix it with the @<resource type>.@ Taken together, here's an example that mounts your home directory excluding filter groups, workflow intermediate output collections, and workflow log collections:
+
+<notextile>
+<pre><code>$ arv-mount --home <span class="userinput">--filters '[["groups.group_class", "!=", "filter"], ["collections.properties.type", "not in", ["intermediate", "log"]]]'</span> ...
+</code></pre>
+</notextile>
+
+Because filters can be awkward to write on the command line, you can also write them in a file, and pass that file path to the @--filters@ option. This example does the same filtering:
+
+<notextile>
+<pre><code>$ <span class="userinput">cat >~/arv-mount-filters.json <<EOF
+[
+ [
+ "groups.group_class",
+ "!=",
+ "filter"
+ ],
+ [
+ "collections.properties.type",
+ "not in",
+ [
+ "intermediate",
+ "log"
+ ]
+ ]
+]
+EOF</span>
+$ arv-mount --home <span class="userinput">--filters ~/arv-mount-filters.json</span> ...
+</code></pre>
+</notextile>
+
+The current implementation of @--filters@ has a few limitations. These may be lifted in a future release:
+
+* You can always access any project or collection by UUID or portable data hash under a magic directory. If you access a project this way, your filters _will_ apply to the project contents.
+* Tag directory listings are generated by querying tags alone. Only filters that apply to @links@ will affect these listings.
+
+h2(#access). Mount access and permissions
+
+table(table table-bordered table-condensed).
+|_. Option(s)|_. Description|
+|@--allow-other@|Let other users on this system read mounted data (default false)|
+|@--read-only@|Mounted data cannot be modified from the mount (default)|
+|@--read-write@|Mounted data can be modified from the mount|
+
+h2(#lifecycle). Mount lifecycle management
+
+table(table table-bordered table-condensed).
+|_. Option(s)|_. Description|
+|@--exec ...@|Mount data, run the specified command, then unmount and exit. @--exec@ reads all remaining options as the command to run, so it must be the last option you specify. Either end your command arguments (and other options) with a @--@ argument, or specify @--exec@ after your mount point.|
+|@--foreground@|Run mount process in the foreground instead of daemonizing (default false)|
+|@--subtype SUBTYPE@|Set mounted filesystem type to @fuse.SUBTYPE@ (default is just @fuse@)|
+|@--replace@|If a FUSE mount is already mounted at the given directory, unmount it before mounting the requested data. If @--subtype@ is specified, unmount only if the mount has that subtype. WARNING: This command can affect any kind of FUSE mount, not just arv-mount.|
+|@--unmount@|If a FUSE mount is already mounted at the given directory, unmount it and exit. If @--subtype@ is specified, unmount only if the mount has that subtype. WARNING: This command can affect any kind of FUSE mount, not just arv-mount.|
+|@--unmount-all@|Unmount all FUSE mounts at or below the given directory, then exit. If @--subtype@ is specified, unmount only if the mount has that subtype. WARNING: This command can affect any kind of FUSE mount, not just arv-mount.|
+|@--unmount-timeout SECONDS@|The number of seconds to wait for a clean unmount after an @--exec@ command has exited (default 2.0). After this time, the mount will be forcefully unmounted.|
+
+h2(#logging). Mount logging and statistics
+
+table(table table-bordered table-condensed).
+|_. Option(s)|_. Description|
+|@--crunchstat-interval SECONDS@|Write stats to stderr every N seconds (default disabled)|
+|@--debug@|Log debug information|
+|@--logfile LOGFILE@|Write debug logs and errors to the specified file (default stderr)|
+
+h2(#cache). Mount local cache setup
+
+table(table table-bordered table-condensed).
+|_. Option(s)|_. Description|
+|@--disk-cache@|Cache data on the local filesystem (default)|
+|@--ram-cache@|Cache data in memory|
+|@--disk-cache-dir DIRECTORY@|Filesystem cache location (default @~/.cache/arvados/keep@)|
+|@--directory-cache BYTES@|Size of directory data cache in bytes (default 128 MiB)|
+|@--file-cache BYTES@|Size of file data cache in bytes (default 8 GiB for filesystem cache, 256 MiB for memory cache)|
+
+h2(#plumbing). Mount interactions with Arvados and Linux
+
+table(table table-bordered table-condensed).
+|_. Option(s)|_. Description|
+|@--disable-event-listening@|Don't subscribe to events on the API server to update mount contents|
+|@--encoding ENCODING@|Filesystem character encoding (default 'utf-8'; specify a name from the "Python codec registry":https://docs.python.org/3/library/codecs.html#standard-encodings)|
+|@--retries RETRIES@|Maximum number of times to retry server requests that encounter temporary failures (e.g., server down). Default 10.|
+|@--storage-classes CLASSES@|Comma-separated list of storage classes to request for new collections|
+
+h2(#examples). Examples
+
+h3(#exec). Using @--exec@
+
+There are a couple of details that are important to understand when you use @--exec@:
+
+* @--exec@ reads all remaining options as the command to run, so it must be the last option you specify. Either end your command arguments (and other options) with a @--@ argument, or specify @--exec@ after your mount point.
+* The command you specify runs from the same directory that you started @arv-mount@ from. To access data inside the mount, you will generally need to pass the path to the mount as an argument.
+
+For example, this generates a recursive listing of all the projects and collections under your home project:
+
+<notextile>
+<pre><code>$ <span class="userinput">arv-mount --home --exec find -type d ArvadosHome -- ArvadosHome</span>
+</code></pre>
+</notextile>
+
+The first @ArvadosHome@ is a path argument to @find at . The second is the mount point argument to @arv-mount at .
+
+h3(#systemd). Running arv-mount as a systemd service
+
+If you want to run @arv-mount@ as a long-running service, it's easy to write a systemd service definition for it. We do not publish one because the entire definition tends to be site-specific, but you can start from this template. You must change the @ExecStart@ path. Comments detail other changes you might want to make.
+
+<notextile>
+<pre><code>[Unit]
+Description=Arvados FUSE mount
+Documentation={{ site.baseurl }}/sdk/fuse/options.html
+
+[Service]
+Type=simple
+CacheDirectory=arvados/keep
+CacheDirectoryMode=0700
+
+# This unit makes the mount available as `Arvados` under the runtime directory root.
+# If this is a system service installed under /etc/systemd/system,
+# the mount will be at /run/Arvados.
+# If this is a user service installed under ~/.config/systemd/user,
+# the mount will be at $XDG_RUNTIME_DIR/Arvados.
+# If you want to mount at another location on the filesystem, remove RuntimeDirectory
+# and replace both instances of %t/Arvados with your desired path.
+RuntimeDirectory=Arvados
+# The arv-mount path must be the absolute path where you installed the command.
+# If you installed from a distribution package, make this /usr/bin/arv-mount.
+# If you installed from pip, replace ... with the path to your virtualenv.
+# You can add options to select what gets mounted, access permissions,
+# cache size, log level, etc.
+ExecStart=<span class="userinput">...</span>/bin/arv-mount --foreground --disk-cache-dir %C/arvados/keep %t/Arvados
+ExecStop=/usr/bin/fusermount -u %t/Arvados
+
+# This unit assumes the running user has a ~/.config/arvados/settings.conf
+# with ARVADOS_API_HOST and ARVADOS_API_TOKEN defined.
+# If not, you can write those in a separate file
+# and set its path as EnvironmentFile.
+# Make sure that file is owned and only readable by the running user (mode 0600).
+#EnvironmentFile=...
+</code></pre>
+</notextile>
diff --git a/doc/sdk/index.html.textile.liquid b/doc/sdk/index.html.textile.liquid
index b733d03bfc..9abfa9789f 100644
--- a/doc/sdk/index.html.textile.liquid
+++ b/doc/sdk/index.html.textile.liquid
@@ -9,13 +9,18 @@ Copyright (C) The Arvados Authors. All rights reserved.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-This section documents language bindings for the "Arvados API":{{site.baseurl}}/api/index.html and Keep that are available for various programming languages. Not all features are available in every SDK. The most complete SDK is the Python SDK. Note that this section only gives a high level overview of each SDK. Consult the "Arvados API":{{site.baseurl}}/api/index.html section for detailed documentation about Arvados API calls available on each resource.
+This section documents client tools and language bindings for the "Arvados API":{{site.baseurl}}/api/index.html and Keep that are available for various programming languages. The most mature, popular packages are:
+
+* "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html (also includes essential command line tools such as @arv-put@ and @arv-get@)
+* "Command line SDK":{{site.baseurl}}/sdk/cli/install.html (includes the @arv@ tool)
+
+Many Arvados Workbench pages provide examples of using the Python SDK and command line tools to access a given resource. Open "API details" from the action menu and open the tab with the example you're interested in.
+
+We provide API bindings for several other languages, but these SDKs may be missing some features or documentation:
-* "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html (also includes essential command line tools such as "arv-put" and "arv-get")
-* "Command line SDK":{{site.baseurl}}/sdk/cli/install.html ("arv")
* "Go SDK":{{site.baseurl}}/sdk/go/index.html
+* "Java SDK":{{site.baseurl}}/sdk/java-v2/index.html
* "R SDK":{{site.baseurl}}/sdk/R/index.html
* "Ruby SDK":{{site.baseurl}}/sdk/ruby/index.html
-* "Java SDK v2":{{site.baseurl}}/sdk/java-v2/index.html
-Many Arvados Workbench pages, under the *Advanced* tab, provide examples of API and SDK use for accessing the current resource .
+Consult the "Arvados API":{{site.baseurl}}/api/index.html section for detailed documentation about Arvados API calls available on each resource.
diff --git a/doc/sdk/java-v2/example.html.textile.liquid b/doc/sdk/java-v2/example.html.textile.liquid
index 8d2fc2f4af..a0841ec432 100644
--- a/doc/sdk/java-v2/example.html.textile.liquid
+++ b/doc/sdk/java-v2/example.html.textile.liquid
@@ -1,7 +1,7 @@
---
layout: default
navsection: sdk
-navmenu: Java SDK v2
+navmenu: Java
title: Examples
...
{% comment %}
diff --git a/doc/sdk/java-v2/index.html.textile.liquid b/doc/sdk/java-v2/index.html.textile.liquid
index ad9f0e1a9d..aca9c48078 100644
--- a/doc/sdk/java-v2/index.html.textile.liquid
+++ b/doc/sdk/java-v2/index.html.textile.liquid
@@ -1,7 +1,7 @@
---
layout: default
navsection: sdk
-navmenu: Java SDK v2
+navmenu: Java
title: "Installation"
...
{% comment %}
diff --git a/doc/sdk/java-v2/javadoc.html.textile.liquid b/doc/sdk/java-v2/javadoc.html.textile.liquid
index 872150f625..686cd2440f 100644
--- a/doc/sdk/java-v2/javadoc.html.textile.liquid
+++ b/doc/sdk/java-v2/javadoc.html.textile.liquid
@@ -1,7 +1,7 @@
---
layout: default
navsection: sdk
-navmenu: Java v2
+navmenu: Java
title: "Javadoc Reference"
no_nav_left: true
diff --git a/doc/sdk/python/arvados-fuse.html.textile.liquid b/doc/sdk/python/arvados-fuse.html.textile.liquid
deleted file mode 100644
index 8b71c7d692..0000000000
--- a/doc/sdk/python/arvados-fuse.html.textile.liquid
+++ /dev/null
@@ -1,48 +0,0 @@
----
-layout: default
-navsection: sdk
-navmenu: Python
-title: Arvados FUSE driver
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-The Arvados FUSE driver is a Python utility that allows you to see the Keep service as a normal filesystem, so that data can be accessed using standard tools. This driver requires the Python SDK installed in order to access Arvados services.
-
-h2. Installation
-
-If you are logged in to a managed Arvados VM, the @arv-mount@ utility should already be installed.
-
-To use the FUSE driver elsewhere, you can install from a distribution package, or PyPI.
-
-h2. Option 1: Install from distribution packages
-
-First, "add the appropriate package repository for your distribution":{{ site.baseurl }}/install/packages.html
-
-{% assign arvados_component = 'python-arvados-fuse' %}
-
-{% include 'install_packages' %}
-
-h2. Option 2: Install with pip
-
-Run @pip install arvados_fuse@ in an appropriate installation environment, such as a virtualenv.
-
-Note:
-
-The FUSE driver uses @pycurl@ which depends on the @libcurl@ C library. To build the module you may have to first install additional packages. On Debian-based distributions you can install them by running:
-
-<notextile>
-<pre><code># <span class="userinput">apt install git build-essential python3-dev libcurl4-openssl-dev libssl-dev</span>
-</code></pre>
-</notextile>
-
-<pre>
-$ apt-get install git build-essential python3-dev libcurl4-openssl-dev libssl-dev python3-llfuse
-</pre>
-
-h3. Usage
-
-Please refer to the "Accessing Keep from GNU/Linux":{{site.baseurl}}/user/tutorials/tutorial-keep-mount-gnu-linux.html tutorial for more information.
diff --git a/doc/user/getting_started/setup-cli.html.textile.liquid b/doc/user/getting_started/setup-cli.html.textile.liquid
index 999f848c13..18f675d04e 100644
--- a/doc/user/getting_started/setup-cli.html.textile.liquid
+++ b/doc/user/getting_started/setup-cli.html.textile.liquid
@@ -35,7 +35,7 @@ Here are the client packages you can install on your system. You can skip any yo
* "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html: This provides an Arvados API client in Python, as well as low-level command line tools.
* "Command-line SDK":{{site.baseurl}}/sdk/cli/install.html: This provides the high-level @arv@ command and user interface to the Arvados API.
-* "FUSE Driver":{{site.baseurl}}/sdk/python/arvados-fuse.html: This provides the @arv-mount@ command and FUSE driver that lets you access Keep using standard Linux filesystem tools.
+* "FUSE Driver":{{site.baseurl}}/sdk/fuse/install.html: This provides the @arv-mount@ command and FUSE driver that lets you access Keep using standard Linux filesystem tools.
* "CWL Runner":{{site.baseurl}}/sdk/python/arvados-cwl-runner.html: This provides the @arvados-cwl-runner@ command to register and run workflows in Crunch.
* "crunchstat-summary":{{site.baseurl}}/user/cwl/crunchstat-summary.html: This tool provides performance reports for Crunch containers.
* "arvados-client":{{site.baseurl}}/user/debugging/container-shell-access.html: This tool provides subcommands for inspecting Crunch containers, both interactively while they're running and after they've finished.
diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py
index 610da477ca..719ec7ee95 100644
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -28,105 +28,327 @@ class ArgumentParser(argparse.ArgumentParser):
def __init__(self):
super(ArgumentParser, self).__init__(
parents=[arv_cmd.retry_opt],
- description='''Mount Keep data under the local filesystem. Default mode is --home''',
- epilog="""
- Note: When using the --exec feature, you must either specify the
- mountpoint before --exec, or mark the end of your --exec arguments
- with "--".
- """)
- self.add_argument('--version', action='version',
- version=u"%s %s" % (sys.argv[0], __version__),
- help='Print version and exit.')
- self.add_argument('mountpoint', type=str, help="""Mount point.""")
- self.add_argument('--allow-other', action='store_true',
- help="""Let other users read the mount""")
- self.add_argument('--subtype', type=str, metavar='STRING',
- help="""Report mounted filesystem type as "fuse.STRING", instead of just "fuse".""")
-
- mode = self.add_mutually_exclusive_group()
-
- mode.add_argument('--all', action='store_const', const='all', dest='mode',
- help="""Mount a subdirectory for each mode: home, shared, by_tag, by_id (default if no --mount-* arguments are given).""")
- mode.add_argument('--custom', action='store_const', const=None, dest='mode',
- help="""Mount a top level meta-directory with subdirectories as specified by additional --mount-* arguments (default if any --mount-* arguments are given).""")
- mode.add_argument('--home', action='store_const', const='home', dest='mode',
- help="""Mount only the user's home project.""")
- mode.add_argument('--shared', action='store_const', const='shared', dest='mode',
- help="""Mount only list of projects shared with the user.""")
- mode.add_argument('--by-tag', action='store_const', const='by_tag', dest='mode',
- help="""Mount subdirectories listed by tag.""")
- mode.add_argument('--by-id', action='store_const', const='by_id', dest='mode',
- help="""Mount subdirectories listed by portable data hash or uuid.""")
- mode.add_argument('--by-pdh', action='store_const', const='by_pdh', dest='mode',
- help="""Mount subdirectories listed by portable data hash.""")
- mode.add_argument('--project', type=str, metavar='UUID',
- help="""Mount the specified project.""")
- mode.add_argument('--collection', type=str, metavar='UUID_or_PDH',
- help="""Mount only the specified collection.""")
-
- mounts = self.add_argument_group('Custom mount options')
- mounts.add_argument('--mount-by-pdh',
- type=str, metavar='PATH', action='append', default=[],
- help="Mount each readable collection at mountpoint/PATH/P where P is the collection's portable data hash.")
- mounts.add_argument('--mount-by-id',
- type=str, metavar='PATH', action='append', default=[],
- help="Mount each readable collection at mountpoint/PATH/UUID and mountpoint/PATH/PDH where PDH is the collection's portable data hash and UUID is its UUID.")
- mounts.add_argument('--mount-by-tag',
- type=str, metavar='PATH', action='append', default=[],
- help="Mount all collections with tag TAG at mountpoint/PATH/TAG/UUID.")
- mounts.add_argument('--mount-home',
- type=str, metavar='PATH', action='append', default=[],
- help="Mount the current user's home project at mountpoint/PATH.")
- mounts.add_argument('--mount-shared',
- type=str, metavar='PATH', action='append', default=[],
- help="Mount projects shared with the current user at mountpoint/PATH.")
- mounts.add_argument('--mount-tmp',
- type=str, metavar='PATH', action='append', default=[],
- help="Create a new collection, mount it in read/write mode at mountpoint/PATH, and delete it when unmounting.")
-
-
- self.add_argument('--debug', action='store_true', help="""Debug mode""")
- self.add_argument('--logfile', help="""Write debug logs and errors to the specified file (default stderr).""")
- self.add_argument('--foreground', action='store_true', help="""Run in foreground (default is to daemonize unless --exec specified)""", default=False)
- self.add_argument('--encoding', type=str, help="Character encoding to use for filesystem, default is utf-8 (see Python codec registry for list of available encodings)", default="utf-8")
-
- self.add_argument('--file-cache', type=int, help="File data cache size, in bytes (default 8 GiB for disk-based cache or 256 MiB with RAM-only cache)", default=0)
- self.add_argument('--directory-cache', type=int, help="Directory data cache size, in bytes (default 128 MiB)", default=128*1024*1024)
-
- cachetype = self.add_mutually_exclusive_group()
- cachetype.add_argument('--ram-cache', action='store_false', dest='disk_cache', help="Use in-memory caching only", default=True)
- cachetype.add_argument('--disk-cache', action='store_true', dest='disk_cache', help="Use disk based caching (default)", default=True)
-
- self.add_argument('--disk-cache-dir', type=str, help="Disk cache location (default ~/.cache/arvados/keep)", default=None)
-
- self.add_argument('--disable-event-listening', action='store_true', help="Don't subscribe to events on the API server", dest="disable_event_listening", default=False)
-
- self.add_argument('--read-only', action='store_false', help="Mount will be read only (default)", dest="enable_write", default=False)
- self.add_argument('--read-write', action='store_true', help="Mount will be read-write", dest="enable_write", default=False)
- self.add_argument('--storage-classes', type=str, metavar='CLASSES', help="Specify comma separated list of storage classes to be used when saving data of new collections", default=None)
-
- self.add_argument('--crunchstat-interval', type=float, help="Write stats to stderr every N seconds (default disabled)", default=0)
-
- unmount = self.add_mutually_exclusive_group()
- unmount.add_argument('--unmount', action='store_true', default=False,
- help="Forcefully unmount the specified mountpoint (if it's a fuse mount) and exit. If --subtype is given, unmount only if the mount has the specified subtype. WARNING: This command can affect any kind of fuse mount, not just arv-mount.")
- unmount.add_argument('--unmount-all', action='store_true', default=False,
- help="Forcefully unmount every fuse mount at or below the specified path and exit. If --subtype is given, unmount only mounts that have the specified subtype. Exit non-zero if any other types of mounts are found at or below the given path. WARNING: This command can affect any kind of fuse mount, not just arv-mount.")
- unmount.add_argument('--replace', action='store_true', default=False,
- help="If a fuse mount is already present at mountpoint, forcefully unmount it before mounting")
- self.add_argument('--unmount-timeout',
- type=float, default=2.0,
- help="Time to wait for graceful shutdown after --exec program exits and filesystem is unmounted")
+ description="Interact with Arvados data through a local filesystem",
+ )
self.add_argument(
+ '--version',
+ action='version',
+ version=u"%s %s" % (sys.argv[0], __version__),
+ help="Print version and exit",
+ )
+ self.add_argument(
+ 'mountpoint',
+ metavar='MOUNT_DIR',
+ help="Directory path to mount data",
+ )
+
+ mode_group = self.add_argument_group("Mount contents")
+ mode = mode_group.add_mutually_exclusive_group()
+ mode.add_argument(
+ '--all',
+ action='store_const',
+ const='all',
+ dest='mode',
+ help="""
+Mount a subdirectory for each mode: `home`, `shared`, `by_id`, and `by_tag`
+(default if no `--mount-*` options are given)
+""",
+ )
+ mode.add_argument(
+ '--custom',
+ action='store_const',
+ const=None,
+ dest='mode',
+ help="""
+Mount a subdirectory for each mode specified by a `--mount-*` option
+(default if any `--mount-*` options are given;
+see "Mount custom layout and filtering" section)
+""",
+ )
+ mode.add_argument(
+ '--collection',
+ metavar='UUID_OR_PDH',
+ help="Mount the specified collection",
+ )
+ mode.add_argument(
+ '--home',
+ action='store_const',
+ const='home',
+ dest='mode',
+ help="Mount your home project",
+ )
+ mode.add_argument(
+ '--project',
+ metavar='UUID',
+ help="Mount the specified project",
+ )
+ mode.add_argument(
+ '--shared',
+ action='store_const',
+ const='shared',
+ dest='mode',
+ help="Mount a subdirectory for each project shared with you",
+ )
+ mode.add_argument(
+ '--by-id',
+ action='store_const',
+ const='by_id',
+ dest='mode',
+ help="""
+Mount a magic directory where collections and projects are accessible through
+subdirectories named after their UUID or portable data hash
+""",
+ )
+ mode.add_argument(
+ '--by-pdh',
+ action='store_const',
+ const='by_pdh',
+ dest='mode',
+ help="""
+Mount a magic directory where collections are accessible through
+subdirectories named after their portable data hash
+""",
+ )
+ mode.add_argument(
+ '--by-tag',
+ action='store_const',
+ const='by_tag',
+ dest='mode',
+ help="Mount a subdirectory for each tag attached to a collection or project",
+ )
+
+ mounts = self.add_argument_group("Mount custom layout and filtering")
+ mounts.add_argument(
'--filters',
type=arv_cmd.JSONArgument(arv_cmd.validate_filters),
- help="""Filters to apply to all project, shared, and tag directory
-contents. Pass filters as either a JSON string or a path to a JSON file.
+ help="""
+Filters to apply to all project, shared, and tag directory contents.
+Pass filters as either a JSON string or a path to a JSON file.
The JSON object should be a list of filters in Arvados API list filter syntax.
-""")
- self.add_argument('--exec', type=str, nargs=argparse.REMAINDER,
- dest="exec_args", metavar=('command', 'args', '...', '--'),
- help="""Mount, run a command, then unmount and exit""")
+""",
+ )
+ mounts.add_argument(
+ '--mount-home',
+ metavar='PATH',
+ action='append',
+ default=[],
+ help="Make your home project available under the mount at `PATH`",
+ )
+ mounts.add_argument(
+ '--mount-shared',
+ metavar='PATH',
+ action='append',
+ default=[],
+ help="Make projects shared with you available under the mount at `PATH`",
+ )
+ mounts.add_argument(
+ '--mount-tmp',
+ metavar='PATH',
+ action='append',
+ default=[],
+ help="""
+Make a new temporary writable collection available under the mount at `PATH`.
+This collection is deleted when the mount is unmounted.
+""",
+ )
+ mounts.add_argument(
+ '--mount-by-id',
+ metavar='PATH',
+ action='append',
+ default=[],
+ help="""
+Make a magic directory available under the mount at `PATH` where collections and
+projects are accessible through subdirectories named after their UUID or
+portable data hash
+""",
+ )
+ mounts.add_argument(
+ '--mount-by-pdh',
+ metavar='PATH',
+ action='append',
+ default=[],
+ help="""
+Make a magic directory available under the mount at `PATH` where collections
+are accessible through subdirectories named after portable data hash
+""",
+ )
+ mounts.add_argument(
+ '--mount-by-tag',
+ metavar='PATH',
+ action='append',
+ default=[],
+ help="""
+Make a subdirectory for each tag attached to a collection or project available
+under the mount at `PATH`
+""" ,
+ )
+
+ perms = self.add_argument_group("Mount access and permissions")
+ perms.add_argument(
+ '--allow-other',
+ action='store_true',
+ help="Let other users on this system read mounted data (default false)",
+ )
+ perms.add_argument(
+ '--read-only',
+ action='store_false',
+ default=False,
+ dest='enable_write',
+ help="Mounted data cannot be modified from the mount (default)",
+ )
+ perms.add_argument(
+ '--read-write',
+ action='store_true',
+ default=False,
+ dest='enable_write',
+ help="Mounted data can be modified from the mount",
+ )
+
+ lifecycle = self.add_argument_group("Mount lifecycle management")
+ lifecycle.add_argument(
+ '--exec',
+ nargs=argparse.REMAINDER,
+ dest="exec_args",
+ help="""
+Mount data, run the specified command, then unmount and exit.
+`--exec` reads all remaining options as the command to run,
+so it must be the last option you specify.
+Either end your command arguments (and other options) with a `--` argument,
+or specify `--exec` after your mount point.
+""",
+ )
+ lifecycle.add_argument(
+ '--foreground',
+ action='store_true',
+ default=False,
+ help="Run mount process in the foreground instead of daemonizing (default false)",
+ )
+ lifecycle.add_argument(
+ '--subtype',
+ help="Set mounted filesystem type to `fuse.SUBTYPE` (default is just `fuse`)",
+ )
+ unmount = lifecycle.add_mutually_exclusive_group()
+ unmount.add_argument(
+ '--replace',
+ action='store_true',
+ default=False,
+ help="""
+If a FUSE mount is already mounted at the given directory,
+unmount it before mounting the requested data.
+If `--subtype` is specified, unmount only if the mount has that subtype.
+WARNING: This command can affect any kind of FUSE mount, not just arv-mount.
+""",
+ )
+ unmount.add_argument(
+ '--unmount',
+ action='store_true',
+ default=False,
+ help="""
+If a FUSE mount is already mounted at the given directory, unmount it and exit.
+If `--subtype` is specified, unmount only if the mount has that subtype.
+WARNING: This command can affect any kind of FUSE mount, not just arv-mount.
+""",
+ )
+ unmount.add_argument(
+ '--unmount-all',
+ action='store_true',
+ default=False,
+ help="""
+Unmount all FUSE mounts at or below the given directory, then exit.
+If `--subtype` is specified, unmount only if the mount has that subtype.
+WARNING: This command can affect any kind of FUSE mount, not just arv-mount.
+""",
+ )
+ lifecycle.add_argument(
+ '--unmount-timeout',
+ type=float,
+ default=2.0,
+ metavar='SECONDS',
+ help="""
+The number of seconds to wait for a clean unmount after an `--exec` command has
+exited (default %(default).01f).
+After this time, the mount will be forcefully unmounted.
+""",
+ )
+
+ reporting = self.add_argument_group("Mount logging and statistics")
+ reporting.add_argument(
+ '--crunchstat-interval',
+ type=float,
+ default=0.0,
+ metavar='SECONDS',
+ help="Write stats to stderr every N seconds (default disabled)",
+ )
+ reporting.add_argument(
+ '--debug',
+ action='store_true',
+ help="Log debug information",
+ )
+ reporting.add_argument(
+ '--logfile',
+ help="Write debug logs and errors to the specified file (default stderr)",
+ )
+
+ cache = self.add_argument_group("Mount local cache setup")
+ cachetype = cache.add_mutually_exclusive_group()
+ cachetype.add_argument(
+ '--disk-cache',
+ action='store_true',
+ default=True,
+ dest='disk_cache',
+ help="Cache data on the local filesystem (default)",
+ )
+ cachetype.add_argument(
+ '--ram-cache',
+ action='store_false',
+ default=True,
+ dest='disk_cache',
+ help="Cache data in memory",
+ )
+ cache.add_argument(
+ '--disk-cache-dir',
+ metavar="DIRECTORY",
+ help="Filesystem cache location (default `~/.cache/arvados/keep`)",
+ )
+ cache.add_argument(
+ '--directory-cache',
+ type=int,
+ default=128*1024*1024,
+ metavar='BYTES',
+ help="Size of directory data cache in bytes (default 128 MiB)",
+ )
+ cache.add_argument(
+ '--file-cache',
+ type=int,
+ default=0,
+ metavar='BYTES',
+ help="""
+Size of file data cache in bytes
+(default 8 GiB for filesystem cache, 256 MiB for memory cache)
+""",
+ )
+
+ plumbing = self.add_argument_group("Mount interactions with Arvados and Linux")
+ plumbing.add_argument(
+ '--disable-event-listening',
+ action='store_true',
+ dest='disable_event_listening',
+ default=False,
+ help="Don't subscribe to events on the API server to update mount contents",
+ )
+ plumbing.add_argument(
+ '--encoding',
+ default="utf-8",
+ help="""
+Filesystem character encoding
+(default %(default)r; specify a name from the Python codec registry)
+""",
+ )
+ plumbing.add_argument(
+ '--storage-classes',
+ metavar='CLASSES',
+ help="Comma-separated list of storage classes to request for new collections",
+ )
class Mount(object):
commit 35b218008f0fa6b700c50ab33324b7d25fc043a6
Author: Tom Clegg <tom at curii.com>
Date: Tue Apr 2 10:28:26 2024 -0400
Merge branch '21598-local-keepstore-emptytrash'
fixes #21598
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go
index 0e0d3c43e4..d8295a24ba 100644
--- a/lib/crunchrun/crunchrun.go
+++ b/lib/crunchrun/crunchrun.go
@@ -2258,9 +2258,14 @@ func startLocalKeepstore(configData ConfigData, logbuf io.Writer) (*exec.Cmd, er
}
// Rather than have an alternate way to tell keepstore how
- // many buffers to use when starting it this way, we just
- // modify the cluster configuration that we feed it on stdin.
- configData.Cluster.API.MaxKeepBlobBuffers = configData.KeepBuffers
+ // many buffers to use, etc., when starting it this way, we
+ // just modify the cluster configuration that we feed it on
+ // stdin.
+ ccfg := *configData.Cluster
+ ccfg.API.MaxKeepBlobBuffers = configData.KeepBuffers
+ ccfg.Collections.BlobTrash = false
+ ccfg.Collections.BlobTrashConcurrency = 0
+ ccfg.Collections.BlobDeleteConcurrency = 0
localaddr := localKeepstoreAddr()
ln, err := net.Listen("tcp", net.JoinHostPort(localaddr, "0"))
@@ -2280,7 +2285,7 @@ func startLocalKeepstore(configData ConfigData, logbuf io.Writer) (*exec.Cmd, er
var confJSON bytes.Buffer
err = json.NewEncoder(&confJSON).Encode(arvados.Config{
Clusters: map[string]arvados.Cluster{
- configData.Cluster.ClusterID: *configData.Cluster,
+ ccfg.ClusterID: ccfg,
},
})
if err != nil {
diff --git a/lib/crunchrun/integration_test.go b/lib/crunchrun/integration_test.go
index d569020824..e24a7b1317 100644
--- a/lib/crunchrun/integration_test.go
+++ b/lib/crunchrun/integration_test.go
@@ -220,6 +220,8 @@ func (s *integrationSuite) TestRunTrivialContainerWithLocalKeepstore(c *C) {
if trial.logConfig == "none" {
c.Check(logExists, Equals, false)
} else {
+ c.Check(log, Matches, `(?ms).*not running trash worker.*`)
+ c.Check(log, Matches, `(?ms).*not running trash emptier.*`)
c.Check(log, trial.matchGetReq, `(?ms).*"reqMethod":"GET".*`)
c.Check(log, trial.matchPutReq, `(?ms).*"reqMethod":"PUT".*,"reqPath":"0e3bcff26d51c895a60ea0d4585e134d".*`)
}
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list