[ARVADOS] updated: a8918e636507200148758302dc6b9a081dc2f206
git at public.curoverse.com
git at public.curoverse.com
Fri May 2 11:09:56 EDT 2014
Summary of changes:
sdk/python/arvados/fuse.py | 150 +++++++++++++++++++++++++++++++++-----------
sdk/python/bin/arv-mount | 16 +++--
sdk/python/test_mount.py | 15 +++--
3 files changed, 132 insertions(+), 49 deletions(-)
via a8918e636507200148758302dc6b9a081dc2f206 (commit)
from de54cdcea8dca015e3b08bb23f7221faa4814ef0 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit a8918e636507200148758302dc6b9a081dc2f206
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Fri May 2 11:09:46 2014 -0400
Refactored directory handling a bit in fuse to better accomodate other virtual
directory types, and added virtual directories for tags (untested). Existing
tests pass, need to write new tests for tag directories.
diff --git a/sdk/python/arvados/fuse.py b/sdk/python/arvados/fuse.py
index 983dc2e..b3723f0 100644
--- a/sdk/python/arvados/fuse.py
+++ b/sdk/python/arvados/fuse.py
@@ -25,32 +25,74 @@ class Directory(object):
self.inode = None
self.parent_inode = parent_inode
self._entries = {}
+ self.stale = True
+
+ # Overriden by subclasses to implement logic to update the entries dict
+ # when the directory is stale
+ def update(self):
+ pass
+
+ # Mark the entries dict as stale
+ def invalidate(self):
+ self.stale = True
+
+ # Only used when computing the size of the disk footprint of the directory
+ # (stub)
+ def size(self):
+ return 0
def __getitem__(self, item):
+ if self.stale:
+ self.update()
return self._entries[item]
- def __setitem__(self, key, item):
- self._entries[key] = item
+ def items(self):
+ if self.stale:
+ self.update()
+ return self._entries.items()
def __iter__(self):
+ if self.stale:
+ self.update()
return self._entries.iterkeys()
- def items(self):
- return self._entries.items()
-
def __contains__(self, k):
+ if self.stale:
+ self.update()
return k in self._entries
- def size(self):
- return 0
+
+class CollectionDirectory(Directory):
+ '''Represents the root of a directory tree holding a collection.'''
+
+ def __init__(self, parent_inode, inodes, collection_locator):
+ super(CollectionDirectory, self).__init__(parent_inode)
+ self.inodes = inodes
+ self.collection_locator = collection_locator
+
+ def update(self):
+ collection = arvados.CollectionReader(arvados.Keep.get(self.collection_locator))
+ for s in collection.all_streams():
+ cwd = self
+ for part in s.name().split('/'):
+ if part != '' and part != '.':
+ if part not in cwd._entries:
+ cwd._entries[part] = self.inodes.add_entry(Directory(cwd.inode))
+ cwd = cwd._entries[part]
+ for k, v in s.files().items():
+ cwd._entries[k] = self.inodes.add_entry(File(cwd.inode, v))
+ self.stale = False
+
class MagicDirectory(Directory):
- '''A special directory that logically contains the set of all extant
- keep locators. When a file is referenced by lookup(), it is tested
- to see if it is a valid keep locator to a manifest, and if so, loads the manifest
- contents as a subdirectory of this directory with the locator as the directory name.
- Since querying a list of all extant keep locators is impractical, only loaded collections
- are visible to readdir().'''
+ '''A special directory that logically contains the set of all extant keep
+ locators. When a file is referenced by lookup(), it is tested to see if it
+ is a valid keep locator to a manifest, and if so, loads the manifest
+ contents as a subdirectory of this directory with the locator as the
+ directory name. Since querying a list of all extant keep locators is
+ impractical, only collections that have already been accessed are visible
+ to readdir().
+ '''
def __init__(self, parent_inode, inodes):
super(MagicDirectory, self).__init__(parent_inode)
@@ -70,11 +112,56 @@ class MagicDirectory(Directory):
def __getitem__(self, item):
if item not in self._entries:
- collection = arvados.CollectionReader(arvados.Keep.get(item))
- self._entries[item] = self.inodes.add_entry(Directory(self.inode))
- self.inodes.load_collection(self._entries[item], collection)
+ self._entries[item] = self.inodes.add_entry(CollectionDirectory(self.inode, self.inodes, item))
return self._entries[item]
+
+class TagsDirectory(Directory):
+ '''A special directory that contains as subdirectories all tags visible to the user.'''
+
+ def __init__(self, parent_inode, inodes, api):
+ super(TagsDirectory, self).__init__(parent_inode)
+ self.inodes = inodes
+ self.api = api
+
+ def update(self):
+ tags = self.api.links().list(filters=[['link_class', '=', 'tag']], select=['name'], distinct = 'name').execute()
+ oldentries = self._entries
+ self._entries = {}
+ for n in tags['items']:
+ if n in oldentries:
+ self._entries[n] = oldentries[n]
+ else:
+ self._entries[n] = self.inodes.add_entry(TagDirectory(self, inodes, api, n))
+ self.stale = False
+
+
+class TagDirectory(Directory):
+ '''A special directory that contains as subdirectories all collections visible
+ to the user that are tagged with a particular tag.
+ '''
+
+ def __init__(self, parent_inode, inodes, api, tag):
+ super(TagDirectory, self).__init__(parent_inode)
+ self.inodes = inodes
+ self.api = api
+ self.tag = tag
+
+ def update(self):
+ collections = self.api.links().list(filters=[['link_class', '=', 'tag'],
+ ['name', '=', self.tag],
+ ['head_uuid', 'is_a', 'arvados#collection']],
+ select=['head_uuid']).execute()
+ oldentries = self._entries
+ self._entries = {}
+ for c in collections['items']:
+ if n in oldentries:
+ self._entries[n] = oldentries[n]
+ else:
+ self._entries[n] = self.inodes.add_entry(CollectionDirectory(self, inodes, api, n['head_uuid']))
+ self.stale = False
+
+
class File(object):
'''Wraps a StreamFileReader for use by Directory.'''
@@ -86,16 +173,18 @@ class File(object):
def size(self):
return self.reader.size()
+
class FileHandle(object):
- '''Connects a numeric file handle to a File or Directory object that has
+ '''Connects a numeric file handle to a File or Directory object that has
been opened by the client.'''
def __init__(self, fh, entry):
self.fh = fh
self.entry = entry
+
class Inodes(object):
- '''Manage the set of inodes. This is the mapping from a numeric id
+ '''Manage the set of inodes. This is the mapping from a numeric id
to a concrete File or Directory object'''
def __init__(self):
@@ -117,32 +206,19 @@ class Inodes(object):
def __contains__(self, k):
return k in self._entries
- def load_collection(self, parent_dir, collection):
- '''parent_dir is the Directory object that will be populated by the collection.
- collection is the arvados.CollectionReader to use as the source'''
- for s in collection.all_streams():
- cwd = parent_dir
- for part in s.name().split('/'):
- if part != '' and part != '.':
- if part not in cwd:
- cwd[part] = self.add_entry(Directory(cwd.inode))
- cwd = cwd[part]
- for k, v in s.files().items():
- cwd[k] = self.add_entry(File(cwd.inode, v))
-
def add_entry(self, entry):
entry.inode = self._counter
self._entries[entry.inode] = entry
self._counter += 1
- return entry
+ return entry
class Operations(llfuse.Operations):
'''This is the main interface with llfuse. The methods on this object are
- called by llfuse threads to service FUSE events to query and read from
+ called by llfuse threads to service FUSE events to query and read from
the file system.
llfuse has its own global lock which is acquired before calling a request handler,
- so request handlers do not run concurrently unless the lock is explicitly released
+ so request handlers do not run concurrently unless the lock is explicitly released
with llfuse.lock_released.'''
def __init__(self, uid, gid):
@@ -151,7 +227,7 @@ class Operations(llfuse.Operations):
self.inodes = Inodes()
self.uid = uid
self.gid = gid
-
+
# dict of inode to filehandle
self._filehandles = {}
self._filehandles_counter = 1
@@ -167,7 +243,7 @@ class Operations(llfuse.Operations):
def access(self, inode, mode, ctx):
return True
-
+
def getattr(self, inode):
e = self.inodes[inode]
@@ -218,7 +294,7 @@ class Operations(llfuse.Operations):
return self.getattr(inode)
else:
raise llfuse.FUSEError(errno.ENOENT)
-
+
def open(self, inode, flags):
if inode in self.inodes:
p = self.inodes[inode]
diff --git a/sdk/python/bin/arv-mount b/sdk/python/bin/arv-mount
index 5e773df..991c260 100755
--- a/sdk/python/bin/arv-mount
+++ b/sdk/python/bin/arv-mount
@@ -1,6 +1,6 @@
#!/usr/bin/env python
-from arvados.fuse import *
+from arvados.fuse import *
import arvados
import subprocess
import argparse
@@ -8,14 +8,20 @@ import argparse
if __name__ == '__main__':
# Handle command line parameters
parser = argparse.ArgumentParser(
- description='Mount Keep data under the local filesystem.',
+ description='''Mount Keep data under the local filesystem. By default, if neither
+ --collection or --tags is specified, this mounts as a virtual directory
+ under which all Keep collections are available as subdirectories named
+ with the Keep locator; however directories will not be visible to 'ls'
+ until a program tries to access them.''',
epilog="""
Note: When using the --exec feature, you must either specify the
mountpoint before --exec, or mark the end of your --exec arguments
with "--".
""")
parser.add_argument('mountpoint', type=str, help="""Mount point.""")
- parser.add_argument('--collection', type=str, help="""Collection locator""")
+ parser.add_argument('--collection', type=str, help="""Mount only the specified collection at the mount point.""")
+ parser.add_argument('--tags', type=str, help="""Mount as a virtual directory consisting of subdirectories representing tagged
+collections on the server.""")
parser.add_argument('--debug', action='store_true', help="""Debug mode""")
parser.add_argument('--exec', type=str, nargs=argparse.REMAINDER,
dest="exec_args", metavar=('command', 'args', '...', '--'),
@@ -39,8 +45,8 @@ with "--".
# Enable FUSE debugging (logs each FUSE request)
if args.debug:
- opts += ['debug']
-
+ opts += ['debug']
+
# Initialize the fuse connection
llfuse.init(operations, args.mountpoint, opts)
diff --git a/sdk/python/test_mount.py b/sdk/python/test_mount.py
index ce61598..e8ab08c 100644
--- a/sdk/python/test_mount.py
+++ b/sdk/python/test_mount.py
@@ -46,8 +46,9 @@ class FuseMountTest(unittest.TestCase):
def runTest(self):
# Create the request handler
operations = fuse.Operations(os.getuid(), os.getgid())
- e = operations.inodes.add_entry(fuse.Directory(llfuse.ROOT_INODE))
- operations.inodes.load_collection(e, arvados.CollectionReader(arvados.Keep.get(self.testcollection)))
+ #e = operations.inodes.add_entry(fuse.Directory(llfuse.ROOT_INODE))
+ #operations.inodes.load_collection(e, arvados.CollectionReader(arvados.Keep.get(self.testcollection)))
+ e = operations.inodes.add_entry(fuse.CollectionDirectory(llfuse.ROOT_INODE, operations.inodes, self.testcollection))
self.mounttmp = tempfile.mkdtemp()
@@ -74,20 +75,20 @@ class FuseMountTest(unittest.TestCase):
d4 = os.listdir(os.path.join(self.mounttmp, 'dir2/dir3'))
d4.sort()
self.assertEqual(d4, ['thing7.txt', 'thing8.txt'])
-
+
files = {'thing1.txt': 'data 1',
'thing2.txt': 'data 2',
'dir1/thing3.txt': 'data 3',
'dir1/thing4.txt': 'data 4',
'dir2/thing5.txt': 'data 5',
- 'dir2/thing6.txt': 'data 6',
+ 'dir2/thing6.txt': 'data 6',
'dir2/dir3/thing7.txt': 'data 7',
'dir2/dir3/thing8.txt': 'data 8'}
for k, v in files.items():
with open(os.path.join(self.mounttmp, k)) as f:
self.assertEqual(f.read(), v)
-
+
def tearDown(self):
# llfuse.close is buggy, so use fusermount instead.
@@ -135,14 +136,14 @@ class FuseMagicTest(unittest.TestCase):
d3 = os.listdir(self.mounttmp)
d3.sort()
self.assertEqual(d3, [self.testcollection])
-
+
files = {}
files[os.path.join(self.mounttmp, self.testcollection, 'thing1.txt')] = 'data 1'
for k, v in files.items():
with open(os.path.join(self.mounttmp, k)) as f:
self.assertEqual(f.read(), v)
-
+
def tearDown(self):
# llfuse.close is buggy, so use fusermount instead.
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list