[ARVADOS] updated: 27616fe74103c079a84ac34b2adb83f1952c5772

git at public.curoverse.com git at public.curoverse.com
Mon May 18 14:26:02 EDT 2015


Summary of changes:
 sdk/python/arvados/arvfile.py          |  1 +
 services/fuse/arvados_fuse/__init__.py | 65 +++++++++++++++++++++++++++++++---
 services/fuse/arvados_fuse/fresh.py    | 33 ++++++++++++++++-
 services/fuse/arvados_fuse/fusedir.py  | 62 +++++++++++++++++++++++++++-----
 services/fuse/arvados_fuse/fusefile.py |  1 +
 services/fuse/bin/arv-mount            |  1 +
 6 files changed, 149 insertions(+), 14 deletions(-)

       via  27616fe74103c079a84ac34b2adb83f1952c5772 (commit)
      from  fea4d2ba4ab741daff3fd17d910b72539a50a447 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 27616fe74103c079a84ac34b2adb83f1952c5772
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date:   Mon May 18 14:27:07 2015 -0400

    3198: Add docstring with overview of the architecture.  Implement check_update
    method wrapper and expand use_counter to every method containing a
    llfuse.lock_released block.  Add note about big_writes option.

diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py
index 792c81f..2d44d6a 100644
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -866,6 +866,7 @@ class ArvadosFile(object):
                 for s in to_delete:
                    self.parent._my_block_manager().delete_bufferblock(s)
 
+            self.parent.notify(MOD, self.parent, self.name, (self, self))
 
     @must_be_writable
     @synchronized
diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py
index fc81087..913db4c 100644
--- a/services/fuse/arvados_fuse/__init__.py
+++ b/services/fuse/arvados_fuse/__init__.py
@@ -1,6 +1,49 @@
-#
-# FUSE driver for Arvados Keep
-#
+"""FUSE driver for Arvados Keep
+
+Architecture:
+
+There is one `Operations` object per mount point.  It is the entry point for all
+read and write requests from the llfuse module.
+
+The operations object owns an `Inodes` object.  The inodes object stores the
+mapping from numeric inode (used throughout the file system API to uniquely
+identify files) to the Python objects that implement files and directories.
+
+The `Inodes` object owns an `InodeCache` object.  The inode cache records the
+memory footprint of file system objects and when they are last used.  When the
+cache limit is exceeded, the least recently used objects are cleared.
+
+File system objects inherit from `fresh.FreshBase` which manages the object lifecycle.
+
+File objects inherit from `fusefile.File`.  Key methods are `readfrom` and `writeto`
+which implement actual reads and writes.
+
+Directory objects inherit from `fusedir.Directory`.  The directory object wraps
+a Python dict which stores the mapping from filenames to directory entries.
+Directory contents can be accessed through the Python operators such as `[]`
+and `in`.  These methods automatically check if the directory is fresh (up to
+date) or stale (needs update) and will call `update` if necessary before
+returing a result.
+
+The general FUSE operation flow is as follows:
+
+- The request handler is called with either an inode or file handle that is the
+  subject of the operation.
+
+- Look up the inode using the Inodes table or the file handle in the
+  filehandles table to get the file system object.
+
+- For methods that alter files or directories, check that the operation is
+  valid and permitted using _check_writable().
+
+- Call the relevant method on the file system object.
+
+- Return the result.
+
+The FUSE driver supports the Arvados event bus.  When an event is received for
+an object that is live in the inode cache, that object is immediately updated.
+
+"""
 
 import os
 import sys
@@ -66,6 +109,17 @@ class DirectoryHandle(Handle):
 
 
 class InodeCache(object):
+    """Records the memory footprint of objects and when they are last used.
+
+    When the cache limit is exceeded, the least recently used objects are
+    cleared.  Clearing the object means discarding its contents to release
+    memory.  The next time the object is accessed, it must be re-fetched from
+    the server.  Note that the inode cache limit is a soft limit; the cache
+    limit may be exceeded if necessary to load very large objects, it may also
+    be exceeded if open file handles prevent objects from being cleared.
+
+    """
+
     def __init__(self, cap, min_entries=4):
         self._entries = collections.OrderedDict()
         self._by_uuid = {}
@@ -172,7 +226,10 @@ class Inodes(object):
             entry.dead = True
             _logger.debug("del_entry on inode %i with refcount %i", entry.inode, entry.ref_count)
 
+
 def catch_exceptions(orig_func):
+    """Catch uncaught exceptions and log them consistently."""
+
     @functools.wraps(orig_func)
     def catch_exceptions_wrapper(self, *args, **kwargs):
         try:
@@ -449,7 +506,7 @@ class Operations(llfuse.Operations):
     @catch_exceptions
     def statfs(self):
         st = llfuse.StatvfsData()
-        st.f_bsize = 64 * 1024
+        st.f_bsize = 128 * 1024
         st.f_blocks = 0
         st.f_files = 0
 
diff --git a/services/fuse/arvados_fuse/fresh.py b/services/fuse/arvados_fuse/fresh.py
index aeb8f73..6ecf35c 100644
--- a/services/fuse/arvados_fuse/fresh.py
+++ b/services/fuse/arvados_fuse/fresh.py
@@ -22,8 +22,39 @@ def use_counter(orig_func):
             self.dec_use()
     return use_counter_wrapper
 
+def check_update(orig_func):
+    @functools.wraps(orig_func)
+    def check_update_wrapper(self, *args, **kwargs):
+        self.checkupdate()
+        return orig_func(self, *args, **kwargs)
+    return check_update_wrapper
+
 class FreshBase(object):
-    """Base class for maintaining fresh/stale state to determine when to update."""
+    """Base class for maintaining object lifecycle.
+
+    Functions include:
+
+    * Indicate if an object is up to date (stale() == false) or needs to be
+      updated sets stale() == True).  Use invalidate() to mark the object as
+      stale.  An object is also automatically stale if it has not been updated
+      in `_poll_time` seconds.
+
+    * Record access time (atime) timestamp
+
+    * Manage internal use count used by the inode cache ("inc_use" and
+      "dec_use").  An object which is in use cannot be cleared by the inode
+      cache.
+
+    * Manage the kernel reference count ("inc_ref" and "dec_ref").  An object
+      which is referenced by the kernel cannot have its inode entry deleted.
+
+    * Record cache footprint, cache priority
+
+    * Record Arvados uuid at the time the object is placed in the cache
+
+    * Clear the object contents (invalidates the object)
+
+    """
     def __init__(self):
         self._stale = True
         self._poll = False
diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py
index 17709df..2fca36e 100644
--- a/services/fuse/arvados_fuse/fusedir.py
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -10,7 +10,7 @@ from apiclient import errors as apiclient_errors
 import errno
 
 from fusefile import StringFile, ObjectFile, FuseArvadosFile
-from fresh import FreshBase, convertTime, use_counter
+from fresh import FreshBase, convertTime, use_counter, check_update
 
 import arvados.collection
 from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern
@@ -45,9 +45,10 @@ class Directory(FreshBase):
     """
 
     def __init__(self, parent_inode, inodes):
+        """parent_inode is the integer inode number"""
+
         super(Directory, self).__init__()
 
-        """parent_inode is the integer inode number"""
         self.inode = None
         if not isinstance(parent_inode, int):
             raise Exception("parent_inode should be an int")
@@ -78,23 +79,23 @@ class Directory(FreshBase):
                 _logger.warn(e)
 
     @use_counter
+    @check_update
     def __getitem__(self, item):
-        self.checkupdate()
         return self._entries[item]
 
     @use_counter
+    @check_update
     def items(self):
-        self.checkupdate()
         return list(self._entries.items())
 
     @use_counter
+    @check_update
     def __contains__(self, k):
-        self.checkupdate()
         return k in self._entries
 
     @use_counter
+    @check_update
     def __len__(self):
-        self.checkupdate()
         return len(self._entries)
 
     def fresh(self):
@@ -196,7 +197,22 @@ class Directory(FreshBase):
     def rename(self, name_old, name_new, src):
         raise NotImplementedError()
 
+
 class CollectionDirectoryBase(Directory):
+    """Represent an Arvados Collection as a directory.
+
+    This class is used for Subcollections, and is also the base class for
+    CollectionDirectory, which implements collection loading/saving on
+    Collection records.
+
+    Most operations act only the underlying Arvados `Collection` object.  The
+    `Collection` object signals via a notify callback to
+    `CollectionDirectoryBase.on_event` that an item was added, removed or
+    modified.  FUSE inodes and directory entries are created, deleted or
+    invalidated in response to these events.
+
+    """
+
     def __init__(self, parent_inode, inodes, collection):
         super(CollectionDirectoryBase, self).__init__(parent_inode, inodes)
         self.collection = collection
@@ -243,28 +259,39 @@ class CollectionDirectoryBase(Directory):
     def writable(self):
         return self.collection.writable()
 
+    @use_counter
     def flush(self):
         with llfuse.lock_released:
             self.collection.root_collection().save()
 
+    @use_counter
+    @check_update
     def create(self, name):
         with llfuse.lock_released:
             self.collection.open(name, "w").close()
 
+    @use_counter
+    @check_update
     def mkdir(self, name):
         with llfuse.lock_released:
             self.collection.mkdirs(name)
 
+    @use_counter
+    @check_update
     def unlink(self, name):
         with llfuse.lock_released:
             self.collection.remove(name)
         self.flush()
 
+    @use_counter
+    @check_update
     def rmdir(self, name):
         with llfuse.lock_released:
             self.collection.remove(name)
         self.flush()
 
+    @use_counter
+    @check_update
     def rename(self, name_old, name_new, src):
         if not isinstance(src, CollectionDirectoryBase):
             raise llfuse.FUSEError(errno.EPERM)
@@ -289,7 +316,7 @@ class CollectionDirectoryBase(Directory):
 
 
 class CollectionDirectory(CollectionDirectoryBase):
-    """Represents the root of a directory tree holding a collection."""
+    """Represents the root of a directory tree representing a collection."""
 
     def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None):
         super(CollectionDirectory, self).__init__(parent_inode, inodes, None)
@@ -343,6 +370,7 @@ class CollectionDirectory(CollectionDirectoryBase):
     def uuid(self):
         return self.collection_locator
 
+    @use_counter
     def update(self):
         try:
             if self.collection_record is not None and portable_data_hash_pattern.match(self.collection_locator):
@@ -403,8 +431,9 @@ class CollectionDirectory(CollectionDirectoryBase):
                 _logger.error("arv-mount manifest_text is: %s", self.collection_record["manifest_text"])
         return False
 
+    @use_counter
+    @check_update
     def __getitem__(self, item):
-        self.checkupdate()
         if item == '.arvados#collection':
             if self.collection_record_file is None:
                 self.collection_record_file = ObjectFile(self.inode, self.collection_record)
@@ -433,6 +462,7 @@ class CollectionDirectory(CollectionDirectoryBase):
         # footprint directly would be more accurate, but also more complicated.
         return self._manifest_size * 128
 
+
 class MagicDirectory(Directory):
     """A special directory that logically contains the set of all extant keep locators.
 
@@ -522,6 +552,7 @@ class TagsDirectory(RecursiveInvalidateDirectory):
         self._poll = True
         self._poll_time = poll_time
 
+    @use_counter
     def update(self):
         with llfuse.lock_released:
             tags = self.api.links().list(
@@ -549,6 +580,7 @@ class TagDirectory(Directory):
         self._poll = poll
         self._poll_time = poll_time
 
+    @use_counter
     def update(self):
         with llfuse.lock_released:
             taggedcollections = self.api.links().list(
@@ -597,6 +629,7 @@ class ProjectDirectory(Directory):
     def uuid(self):
         return self.project_uuid
 
+    @use_counter
     def update(self):
         if self.project_object_file == None:
             self.project_object_file = ObjectFile(self.inode, self.project_object)
@@ -650,8 +683,9 @@ class ProjectDirectory(Directory):
         finally:
             self._updating_lock.release()
 
+    @use_counter
+    @check_update
     def __getitem__(self, item):
-        self.checkupdate()
         if item == '.arvados#project':
             return self.project_object_file
         else:
@@ -663,6 +697,8 @@ class ProjectDirectory(Directory):
         else:
             return super(ProjectDirectory, self).__contains__(k)
 
+    @use_counter
+    @check_update
     def writable(self):
         with llfuse.lock_released:
             if not self._current_user:
@@ -672,6 +708,8 @@ class ProjectDirectory(Directory):
     def persisted(self):
         return True
 
+    @use_counter
+    @check_update
     def mkdir(self, name):
         try:
             with llfuse.lock_released:
@@ -683,6 +721,8 @@ class ProjectDirectory(Directory):
             _logger.error(error)
             raise llfuse.FUSEError(errno.EEXIST)
 
+    @use_counter
+    @check_update
     def rmdir(self, name):
         if name not in self:
             raise llfuse.FUSEError(errno.ENOENT)
@@ -694,6 +734,8 @@ class ProjectDirectory(Directory):
             self.api.collections().delete(uuid=self[name].uuid()).execute(num_retries=self.num_retries)
         self.invalidate()
 
+    @use_counter
+    @check_update
     def rename(self, name_old, name_new, src):
         if not isinstance(src, ProjectDirectory):
             raise llfuse.FUSEError(errno.EPERM)
@@ -719,6 +761,7 @@ class ProjectDirectory(Directory):
         self._entries[name_new] = ent
         llfuse.invalidate_entry(src.inode, name_old)
 
+
 class SharedDirectory(Directory):
     """A special directory that represents users or groups who have shared projects with me."""
 
@@ -731,6 +774,7 @@ class SharedDirectory(Directory):
         self._poll = True
         self._poll_time = poll_time
 
+    @use_counter
     def update(self):
         with llfuse.lock_released:
             all_projects = arvados.util.list_all(
diff --git a/services/fuse/arvados_fuse/fusefile.py b/services/fuse/arvados_fuse/fusefile.py
index d09b1f0..d33f9f9 100644
--- a/services/fuse/arvados_fuse/fusefile.py
+++ b/services/fuse/arvados_fuse/fusefile.py
@@ -37,6 +37,7 @@ class File(FreshBase):
     def flush(self):
         pass
 
+
 class FuseArvadosFile(File):
     """Wraps a ArvadosFile."""
 
diff --git a/services/fuse/bin/arv-mount b/services/fuse/bin/arv-mount
index 49874da..76476da 100755
--- a/services/fuse/bin/arv-mount
+++ b/services/fuse/bin/arv-mount
@@ -156,6 +156,7 @@ From here, the following directories are available:
     opts = [optname for optname in ['allow_other', 'debug']
             if getattr(args, optname)]
 
+    # Increase default read/write size from 4KiB to 128KiB
     opts += ["big_writes", "max_read=131072"]
 
     if args.exec_args:

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list