[ARVADOS] updated: 27616fe74103c079a84ac34b2adb83f1952c5772
git at public.curoverse.com
git at public.curoverse.com
Mon May 18 14:26:02 EDT 2015
Summary of changes:
sdk/python/arvados/arvfile.py | 1 +
services/fuse/arvados_fuse/__init__.py | 65 +++++++++++++++++++++++++++++++---
services/fuse/arvados_fuse/fresh.py | 33 ++++++++++++++++-
services/fuse/arvados_fuse/fusedir.py | 62 +++++++++++++++++++++++++++-----
services/fuse/arvados_fuse/fusefile.py | 1 +
services/fuse/bin/arv-mount | 1 +
6 files changed, 149 insertions(+), 14 deletions(-)
via 27616fe74103c079a84ac34b2adb83f1952c5772 (commit)
from fea4d2ba4ab741daff3fd17d910b72539a50a447 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 27616fe74103c079a84ac34b2adb83f1952c5772
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Mon May 18 14:27:07 2015 -0400
3198: Add docstring with overview of the architecture. Implement check_update
method wrapper and expand use_counter to every method containing a
llfuse.lock_released block. Add note about big_writes option.
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 792c81f..2d44d6a 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -866,6 +866,7 @@ class ArvadosFile(object):
for s in to_delete:
self.parent._my_block_manager().delete_bufferblock(s)
+ self.parent.notify(MOD, self.parent, self.name, (self, self))
@must_be_writable
@synchronized
diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py
index fc81087..913db4c 100644
--- a/services/fuse/arvados_fuse/__init__.py
+++ b/services/fuse/arvados_fuse/__init__.py
@@ -1,6 +1,49 @@
-#
-# FUSE driver for Arvados Keep
-#
+"""FUSE driver for Arvados Keep
+
+Architecture:
+
+There is one `Operations` object per mount point. It is the entry point for all
+read and write requests from the llfuse module.
+
+The operations object owns an `Inodes` object. The inodes object stores the
+mapping from numeric inode (used throughout the file system API to uniquely
+identify files) to the Python objects that implement files and directories.
+
+The `Inodes` object owns an `InodeCache` object. The inode cache records the
+memory footprint of file system objects and when they are last used. When the
+cache limit is exceeded, the least recently used objects are cleared.
+
+File system objects inherit from `fresh.FreshBase` which manages the object lifecycle.
+
+File objects inherit from `fusefile.File`. Key methods are `readfrom` and `writeto`
+which implement actual reads and writes.
+
+Directory objects inherit from `fusedir.Directory`. The directory object wraps
+a Python dict which stores the mapping from filenames to directory entries.
+Directory contents can be accessed through the Python operators such as `[]`
+and `in`. These methods automatically check if the directory is fresh (up to
+date) or stale (needs update) and will call `update` if necessary before
+returing a result.
+
+The general FUSE operation flow is as follows:
+
+- The request handler is called with either an inode or file handle that is the
+ subject of the operation.
+
+- Look up the inode using the Inodes table or the file handle in the
+ filehandles table to get the file system object.
+
+- For methods that alter files or directories, check that the operation is
+ valid and permitted using _check_writable().
+
+- Call the relevant method on the file system object.
+
+- Return the result.
+
+The FUSE driver supports the Arvados event bus. When an event is received for
+an object that is live in the inode cache, that object is immediately updated.
+
+"""
import os
import sys
@@ -66,6 +109,17 @@ class DirectoryHandle(Handle):
class InodeCache(object):
+ """Records the memory footprint of objects and when they are last used.
+
+ When the cache limit is exceeded, the least recently used objects are
+ cleared. Clearing the object means discarding its contents to release
+ memory. The next time the object is accessed, it must be re-fetched from
+ the server. Note that the inode cache limit is a soft limit; the cache
+ limit may be exceeded if necessary to load very large objects, it may also
+ be exceeded if open file handles prevent objects from being cleared.
+
+ """
+
def __init__(self, cap, min_entries=4):
self._entries = collections.OrderedDict()
self._by_uuid = {}
@@ -172,7 +226,10 @@ class Inodes(object):
entry.dead = True
_logger.debug("del_entry on inode %i with refcount %i", entry.inode, entry.ref_count)
+
def catch_exceptions(orig_func):
+ """Catch uncaught exceptions and log them consistently."""
+
@functools.wraps(orig_func)
def catch_exceptions_wrapper(self, *args, **kwargs):
try:
@@ -449,7 +506,7 @@ class Operations(llfuse.Operations):
@catch_exceptions
def statfs(self):
st = llfuse.StatvfsData()
- st.f_bsize = 64 * 1024
+ st.f_bsize = 128 * 1024
st.f_blocks = 0
st.f_files = 0
diff --git a/services/fuse/arvados_fuse/fresh.py b/services/fuse/arvados_fuse/fresh.py
index aeb8f73..6ecf35c 100644
--- a/services/fuse/arvados_fuse/fresh.py
+++ b/services/fuse/arvados_fuse/fresh.py
@@ -22,8 +22,39 @@ def use_counter(orig_func):
self.dec_use()
return use_counter_wrapper
+def check_update(orig_func):
+ @functools.wraps(orig_func)
+ def check_update_wrapper(self, *args, **kwargs):
+ self.checkupdate()
+ return orig_func(self, *args, **kwargs)
+ return check_update_wrapper
+
class FreshBase(object):
- """Base class for maintaining fresh/stale state to determine when to update."""
+ """Base class for maintaining object lifecycle.
+
+ Functions include:
+
+ * Indicate if an object is up to date (stale() == false) or needs to be
+ updated sets stale() == True). Use invalidate() to mark the object as
+ stale. An object is also automatically stale if it has not been updated
+ in `_poll_time` seconds.
+
+ * Record access time (atime) timestamp
+
+ * Manage internal use count used by the inode cache ("inc_use" and
+ "dec_use"). An object which is in use cannot be cleared by the inode
+ cache.
+
+ * Manage the kernel reference count ("inc_ref" and "dec_ref"). An object
+ which is referenced by the kernel cannot have its inode entry deleted.
+
+ * Record cache footprint, cache priority
+
+ * Record Arvados uuid at the time the object is placed in the cache
+
+ * Clear the object contents (invalidates the object)
+
+ """
def __init__(self):
self._stale = True
self._poll = False
diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py
index 17709df..2fca36e 100644
--- a/services/fuse/arvados_fuse/fusedir.py
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -10,7 +10,7 @@ from apiclient import errors as apiclient_errors
import errno
from fusefile import StringFile, ObjectFile, FuseArvadosFile
-from fresh import FreshBase, convertTime, use_counter
+from fresh import FreshBase, convertTime, use_counter, check_update
import arvados.collection
from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern
@@ -45,9 +45,10 @@ class Directory(FreshBase):
"""
def __init__(self, parent_inode, inodes):
+ """parent_inode is the integer inode number"""
+
super(Directory, self).__init__()
- """parent_inode is the integer inode number"""
self.inode = None
if not isinstance(parent_inode, int):
raise Exception("parent_inode should be an int")
@@ -78,23 +79,23 @@ class Directory(FreshBase):
_logger.warn(e)
@use_counter
+ @check_update
def __getitem__(self, item):
- self.checkupdate()
return self._entries[item]
@use_counter
+ @check_update
def items(self):
- self.checkupdate()
return list(self._entries.items())
@use_counter
+ @check_update
def __contains__(self, k):
- self.checkupdate()
return k in self._entries
@use_counter
+ @check_update
def __len__(self):
- self.checkupdate()
return len(self._entries)
def fresh(self):
@@ -196,7 +197,22 @@ class Directory(FreshBase):
def rename(self, name_old, name_new, src):
raise NotImplementedError()
+
class CollectionDirectoryBase(Directory):
+ """Represent an Arvados Collection as a directory.
+
+ This class is used for Subcollections, and is also the base class for
+ CollectionDirectory, which implements collection loading/saving on
+ Collection records.
+
+ Most operations act only the underlying Arvados `Collection` object. The
+ `Collection` object signals via a notify callback to
+ `CollectionDirectoryBase.on_event` that an item was added, removed or
+ modified. FUSE inodes and directory entries are created, deleted or
+ invalidated in response to these events.
+
+ """
+
def __init__(self, parent_inode, inodes, collection):
super(CollectionDirectoryBase, self).__init__(parent_inode, inodes)
self.collection = collection
@@ -243,28 +259,39 @@ class CollectionDirectoryBase(Directory):
def writable(self):
return self.collection.writable()
+ @use_counter
def flush(self):
with llfuse.lock_released:
self.collection.root_collection().save()
+ @use_counter
+ @check_update
def create(self, name):
with llfuse.lock_released:
self.collection.open(name, "w").close()
+ @use_counter
+ @check_update
def mkdir(self, name):
with llfuse.lock_released:
self.collection.mkdirs(name)
+ @use_counter
+ @check_update
def unlink(self, name):
with llfuse.lock_released:
self.collection.remove(name)
self.flush()
+ @use_counter
+ @check_update
def rmdir(self, name):
with llfuse.lock_released:
self.collection.remove(name)
self.flush()
+ @use_counter
+ @check_update
def rename(self, name_old, name_new, src):
if not isinstance(src, CollectionDirectoryBase):
raise llfuse.FUSEError(errno.EPERM)
@@ -289,7 +316,7 @@ class CollectionDirectoryBase(Directory):
class CollectionDirectory(CollectionDirectoryBase):
- """Represents the root of a directory tree holding a collection."""
+ """Represents the root of a directory tree representing a collection."""
def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None):
super(CollectionDirectory, self).__init__(parent_inode, inodes, None)
@@ -343,6 +370,7 @@ class CollectionDirectory(CollectionDirectoryBase):
def uuid(self):
return self.collection_locator
+ @use_counter
def update(self):
try:
if self.collection_record is not None and portable_data_hash_pattern.match(self.collection_locator):
@@ -403,8 +431,9 @@ class CollectionDirectory(CollectionDirectoryBase):
_logger.error("arv-mount manifest_text is: %s", self.collection_record["manifest_text"])
return False
+ @use_counter
+ @check_update
def __getitem__(self, item):
- self.checkupdate()
if item == '.arvados#collection':
if self.collection_record_file is None:
self.collection_record_file = ObjectFile(self.inode, self.collection_record)
@@ -433,6 +462,7 @@ class CollectionDirectory(CollectionDirectoryBase):
# footprint directly would be more accurate, but also more complicated.
return self._manifest_size * 128
+
class MagicDirectory(Directory):
"""A special directory that logically contains the set of all extant keep locators.
@@ -522,6 +552,7 @@ class TagsDirectory(RecursiveInvalidateDirectory):
self._poll = True
self._poll_time = poll_time
+ @use_counter
def update(self):
with llfuse.lock_released:
tags = self.api.links().list(
@@ -549,6 +580,7 @@ class TagDirectory(Directory):
self._poll = poll
self._poll_time = poll_time
+ @use_counter
def update(self):
with llfuse.lock_released:
taggedcollections = self.api.links().list(
@@ -597,6 +629,7 @@ class ProjectDirectory(Directory):
def uuid(self):
return self.project_uuid
+ @use_counter
def update(self):
if self.project_object_file == None:
self.project_object_file = ObjectFile(self.inode, self.project_object)
@@ -650,8 +683,9 @@ class ProjectDirectory(Directory):
finally:
self._updating_lock.release()
+ @use_counter
+ @check_update
def __getitem__(self, item):
- self.checkupdate()
if item == '.arvados#project':
return self.project_object_file
else:
@@ -663,6 +697,8 @@ class ProjectDirectory(Directory):
else:
return super(ProjectDirectory, self).__contains__(k)
+ @use_counter
+ @check_update
def writable(self):
with llfuse.lock_released:
if not self._current_user:
@@ -672,6 +708,8 @@ class ProjectDirectory(Directory):
def persisted(self):
return True
+ @use_counter
+ @check_update
def mkdir(self, name):
try:
with llfuse.lock_released:
@@ -683,6 +721,8 @@ class ProjectDirectory(Directory):
_logger.error(error)
raise llfuse.FUSEError(errno.EEXIST)
+ @use_counter
+ @check_update
def rmdir(self, name):
if name not in self:
raise llfuse.FUSEError(errno.ENOENT)
@@ -694,6 +734,8 @@ class ProjectDirectory(Directory):
self.api.collections().delete(uuid=self[name].uuid()).execute(num_retries=self.num_retries)
self.invalidate()
+ @use_counter
+ @check_update
def rename(self, name_old, name_new, src):
if not isinstance(src, ProjectDirectory):
raise llfuse.FUSEError(errno.EPERM)
@@ -719,6 +761,7 @@ class ProjectDirectory(Directory):
self._entries[name_new] = ent
llfuse.invalidate_entry(src.inode, name_old)
+
class SharedDirectory(Directory):
"""A special directory that represents users or groups who have shared projects with me."""
@@ -731,6 +774,7 @@ class SharedDirectory(Directory):
self._poll = True
self._poll_time = poll_time
+ @use_counter
def update(self):
with llfuse.lock_released:
all_projects = arvados.util.list_all(
diff --git a/services/fuse/arvados_fuse/fusefile.py b/services/fuse/arvados_fuse/fusefile.py
index d09b1f0..d33f9f9 100644
--- a/services/fuse/arvados_fuse/fusefile.py
+++ b/services/fuse/arvados_fuse/fusefile.py
@@ -37,6 +37,7 @@ class File(FreshBase):
def flush(self):
pass
+
class FuseArvadosFile(File):
"""Wraps a ArvadosFile."""
diff --git a/services/fuse/bin/arv-mount b/services/fuse/bin/arv-mount
index 49874da..76476da 100755
--- a/services/fuse/bin/arv-mount
+++ b/services/fuse/bin/arv-mount
@@ -156,6 +156,7 @@ From here, the following directories are available:
opts = [optname for optname in ['allow_other', 'debug']
if getattr(args, optname)]
+ # Increase default read/write size from 4KiB to 128KiB
opts += ["big_writes", "max_read=131072"]
if args.exec_args:
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list