[ARVADOS] updated: cd41c68c201f5307ec3372fa141842e1be695a98
git at public.curoverse.com
git at public.curoverse.com
Wed Sep 3 11:39:14 EDT 2014
Summary of changes:
services/fuse/arvados_fuse/__init__.py | 196 +++++++++++++++++++++++----------
1 file changed, 137 insertions(+), 59 deletions(-)
via cd41c68c201f5307ec3372fa141842e1be695a98 (commit)
via a9e8119ed4931df57eef92121bc1dd6e65fa8783 (commit)
from 370d158d6cfd9a610e68946cc22998543ef10e3d (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit cd41c68c201f5307ec3372fa141842e1be695a98
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Sep 3 11:39:09 2014 -0400
3644: Collections work again, added .portable_data_hash and .manifest_text
virtual files to detect if a directory is a collection.
diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py
index 3ce84df..a80962b 100644
--- a/services/fuse/arvados_fuse/__init__.py
+++ b/services/fuse/arvados_fuse/__init__.py
@@ -36,8 +36,8 @@ def sanitize_filename(dirty):
continue
fn += c
- # strip whitespace and leading - or ~
- stripped = fn.strip().lstrip("-~")
+ # strip leading - or ~ and leading/trailing whitespace
+ stripped = fn.lstrip("-~ ").rstrip()
if len(stripped) > 0:
return stripped
else:
@@ -78,10 +78,12 @@ class FreshBase(object):
class File(FreshBase):
'''Base for file objects.'''
- def __init__(self, parent_inode):
+ def __init__(self, parent_inode, _ctime=0, _mtime=0):
super(File, self).__init__()
self.inode = None
self.parent_inode = parent_inode
+ self._ctime = _ctime
+ self._mtime = _mtime
def size(self):
return 0
@@ -89,14 +91,19 @@ class File(FreshBase):
def readfrom(self, off, size):
return ''
+ def ctime(self):
+ return self._ctime
+
+ def mtime(self):
+ return self._mtime
+
class StreamReaderFile(File):
'''Wraps a StreamFileReader as a file.'''
- def __init__(self, parent_inode, reader, collection):
- super(StreamReaderFile, self).__init__(parent_inode)
+ def __init__(self, parent_inode, reader, _ctime, _mtime):
+ super(StreamReaderFile, self).__init__(parent_inode, _ctime, _mtime)
self.reader = reader
- self.collection = collection
def size(self):
return self.reader.size()
@@ -107,27 +114,26 @@ class StreamReaderFile(File):
def stale(self):
return False
- def ctime(self):
- return convertTime(self.collection["created_at"])
- def mtime(self):
- return convertTime(self.collection["modified_at"])
+class StringFile(File):
+ '''Wrap a simple string as a file'''
+ def __init__(self, parent_inode, contents, _ctime, _mtime):
+ super(StringFile, self).__init__(parent_inode, _ctime, _mtime)
+ self.contents = contents
+ def size(self):
+ return len(self.contents)
+
+ def readfrom(self, off, size):
+ return self.contents[off:(off+size)]
-class ObjectFile(File):
- '''Wraps a dict as a serialized json object.'''
+class ObjectFile(StringFile):
+ '''Wrap a dict as a serialized json object.'''
def __init__(self, parent_inode, contents):
- super(ObjectFile, self).__init__(parent_inode)
+ super(ObjectFile, self).__init__(parent_inode, json.dumps(self.contentsdict, indent=4, sort_keys=True))
self.contentsdict = contents
self.uuid = self.contentsdict['uuid']
- self.contents = json.dumps(self.contentsdict, indent=4, sort_keys=True)
-
- def size(self):
- return len(self.contents)
-
- def readfrom(self, off, size):
- return self.contents[off:(off+size)]
class Directory(FreshBase):
@@ -240,17 +246,31 @@ class CollectionDirectory(Directory):
self.inodes = inodes
self.api = api
self.collection_locator = collection_locator
- self.portable_data_hash = None
- self.collection_object = self.api.collections().get(uuid=self.collection_locator).execute()
+ self.manifest_text_file = None
+ self.pdh_file = None
+ self.collection_object = None
def same(self, i):
return i['uuid'] == self.collection_locator or i['portable_data_hash'] == self.collection_locator
def update(self):
try:
- self.collection_object = self.api.collections().get(uuid=self.collection_locator).execute()
- if self.portable_data_hash != self.collection_object["portable_data_hash"]:
- self.portable_data_hash = self.collection_object["portable_data_hash"]
+ new_collection_object = self.api.collections().get(uuid=self.collection_locator).execute()
+ if "portable_data_hash" not in new_collection_object:
+ new_collection_object["portable_data_hash"] = new_collection_object["uuid"]
+
+ if self.collection_object is None or self.collection_object["portable_data_hash"] != new_collection_object["portable_data_hash"]:
+ self.collection_object = new_collection_object
+
+ if self.manifest_text_file is not None:
+ self.manifest_text_file.contents = self.collection_object["manifest_text"]
+ self.manifest_text_file._ctime = self.ctime()
+ self.manifest_text_file._mtime = self.mtime()
+ if self.pdh_file is not None:
+ self.pdh_file.contents = self.collection_object["portable_data_hash"]
+ self.pdh_file._ctime = self.ctime()
+ self.pdh_file._mtime = self.mtime()
+
self.clear()
collection = arvados.CollectionReader(self.collection_object["manifest_text"], self.api)
for s in collection.all_streams():
@@ -262,18 +282,43 @@ class CollectionDirectory(Directory):
cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode))
cwd = cwd._entries[partname]
for k, v in s.files().items():
- cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.collection_object))
+ cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.ctime(), self.mtime()))
self.fresh()
return True
except Exception as detail:
- _logger.debug("arv-mount %s: error: %s",
- self.collection_locator, detail)
+ _logger.error("arv-mount %s: error", self.collection_locator)
+ _logger.exception(detail)
return False
+ def __getitem__(self, item):
+ self.checkupdate()
+ if item == '.manifest_text':
+ if self.manifest_text_file is None:
+ self.manifest_text_file = StringFile(self.inode, self.collection_object["manifest_text"], self.ctime(), self.mtime())
+ self.inodes.add_entry(self.manifest_text_file)
+ return self.manifest_text_file
+ elif item == '.portable_data_hash':
+ if self.pdh_file is None:
+ self.pdh_file = StringFile(self.inode, self.collection_object["portable_data_hash"], self.ctime(), self.mtime())
+ print self.ctime
+ print self.pdh_file._ctime
+ self.inodes.add_entry(self.pdh_file)
+ return self.pdh_file
+ else:
+ return super(CollectionDirectory, self).__getitem__(item)
+
+ def __contains__(self, k):
+ if k == '.manifest_text' or '.portable_data_hash':
+ return True
+ else:
+ return super(CollectionDirectory, self).__contains__(k)
+
def ctime(self):
+ self.checkupdate()
return convertTime(self.collection_object["created_at"])
def mtime(self):
+ self.checkupdate()
return convertTime(self.collection_object["modified_at"])
class MagicDirectory(Directory):
commit a9e8119ed4931df57eef92121bc1dd6e65fa8783
Author: Peter Amstutz <peter.amstutz at curoverse.com>
Date: Wed Sep 3 10:26:50 2014 -0400
3644: HomeDirectory and ProjectDirectories work, added filename sanitization.
diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py
index 0be4c44..3ce84df 100644
--- a/services/fuse/arvados_fuse/__init__.py
+++ b/services/fuse/arvados_fuse/__init__.py
@@ -24,6 +24,26 @@ _logger = logging.getLogger('arvados.arvados_fuse')
def convertTime(t):
return calendar.timegm(time.strptime(t, "%Y-%m-%dT%H:%M:%SZ"))
+def sanitize_filename(dirty):
+ # http://www.dwheeler.com/essays/fixing-unix-linux-filenames.html
+ if dirty is None:
+ return None
+
+ fn = ""
+ for c in dirty:
+ if (c >= '\x00' and c <= '\x1f') or c == '\x7f' or c == '/':
+ # skip control characters and /
+ continue
+ fn += c
+
+ # strip whitespace and leading - or ~
+ stripped = fn.strip().lstrip("-~")
+ if len(stripped) > 0:
+ return stripped
+ else:
+ return None
+
+
class FreshBase(object):
'''Base class for maintaining fresh/stale state to determine when to update.'''
def __init__(self):
@@ -160,33 +180,44 @@ class Directory(FreshBase):
return k in self._entries
def merge(self, items, fn, same, new_entry):
- '''Helper method for updating the contents of the directory.
+ '''Helper method for updating the contents of the directory. Takes a list
+ describing the new contents of the directory, reuse entries that are
+ the same in both the old and new lists, create new entries, and delete
+ old entries missing from the new list.
- items: array with new directory contents
+ items: iterable with new directory contents
fn: function to take an entry in 'items' and return the desired file or
- directory name
+ directory name, or None if this entry should be skipped
- same: function to compare an existing entry with an entry in the items
- list to determine whether to keep the existing entry.
+ same: function to compare an existing entry (a File or Directory
+ object) with an entry in the items list to determine whether to keep
+ the existing entry.
+
+ new_entry: function to create a new directory entry (File or Directory
+ object) from an entry in the items list.
- new_entry: function to create a new directory entry from array entry.
'''
oldentries = self._entries
self._entries = {}
for i in items:
- n = fn(i)
- if n in oldentries and same(oldentries[n], i):
- self._entries[n] = oldentries[n]
- del oldentries[n]
- else:
- ent = new_entry(i)
- if ent is not None:
- self._entries[n] = self.inodes.add_entry(ent)
- for n in oldentries:
- llfuse.invalidate_entry(self.inode, str(n))
- self.inodes.del_entry(oldentries[n])
+ name = sanitize_filename(fn(i))
+ if name:
+ if name in oldentries and same(oldentries[name], i):
+ # move existing directory entry over
+ self._entries[name] = oldentries[name]
+ del oldentries[name]
+ else:
+ # create new directory entry
+ ent = new_entry(i)
+ if ent is not None:
+ self._entries[name] = self.inodes.add_entry(ent)
+
+ # delete any other directory entries that were not in found in 'items'
+ for i in oldentries:
+ llfuse.invalidate_entry(self.inode, str(i))
+ self.inodes.del_entry(oldentries[i])
self.fresh()
def clear(self):
@@ -226,11 +257,12 @@ class CollectionDirectory(Directory):
cwd = self
for part in s.name().split('/'):
if part != '' and part != '.':
- if part not in cwd._entries:
- cwd._entries[part] = self.inodes.add_entry(Directory(cwd.inode))
- cwd = cwd._entries[part]
+ partname = sanitize_filename(part)
+ if partname not in cwd._entries:
+ cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode))
+ cwd = cwd._entries[partname]
for k, v in s.files().items():
- cwd._entries[k] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.collection_object))
+ cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.collection_object))
self.fresh()
return True
except Exception as detail:
@@ -349,10 +381,12 @@ class ProjectDirectory(RecursiveInvalidateDirectory):
self.uuid = project_object['uuid']
def createDirectory(self, i):
- if re.match(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}', i['uuid']) and i['name'] is not None:
+ if re.match(r'[a-z0-9]{5}-4zz18-[a-z0-9]{15}', i['uuid']):
return CollectionDirectory(self.inode, self.inodes, self.api, i['uuid'])
elif re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', i['uuid']):
return ProjectDirectory(self.inode, self.inodes, self.api, i, self._poll, self._poll_time)
+ elif re.match(r'[a-z0-9]{5}-o0j2j-[a-z0-9]{15}', i['uuid']) and i['head_kind'] == 'arvados#collection':
+ return CollectionDirectory(self.inode, self.inodes, self.api, i['head_uuid'])
#elif re.match(r'[a-z0-9]{5}-8i9sb-[a-z0-9]{15}', i['uuid']):
# return None
#elif re.match(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}', i['uuid']):
@@ -360,9 +394,6 @@ class ProjectDirectory(RecursiveInvalidateDirectory):
else:
return None
- def contents(self):
- return arvados.util.list_all(self.api.groups().contents, uuid=self.uuid)
-
def update(self):
def same(a, i):
if isinstance(a, CollectionDirectory):
@@ -375,11 +406,15 @@ class ProjectDirectory(RecursiveInvalidateDirectory):
if re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', self.uuid):
self.project_object = self.api.groups().get(uuid=self.uuid).execute()
- print self.project_object
elif re.match(r'[a-z0-9]{5}-tpzed-[a-z0-9]{15}', self.uuid):
self.project_object = self.api.users().get(uuid=self.uuid).execute()
- self.merge(self.contents(),
+ contents = arvados.util.list_all(self.api.groups().contents, uuid=self.uuid)
+
+ # Name links will be obsolete soon, take this out when there are no more pre-#3036 in use.
+ contents += arvados.util.list_all(self.api.links().list, filters=[['tail_uuid', '=', self.uuid], ['link_class', '=', 'name']])
+
+ self.merge(contents,
lambda i: i['name'] if 'name' in i and i['name'] is not None and len(i['name']) > 0 else i['uuid'],
same,
self.createDirectory)
@@ -401,11 +436,11 @@ class HomeDirectory(RecursiveInvalidateDirectory):
self.inodes = inodes
self.api = api
- try:
- arvados.events.subscribe(self.api, [], lambda ev: self.invalidate())
- except:
- self._poll = True
- self._poll_time = poll_time
+ # try:
+ # arvados.events.subscribe(self.api, [], lambda ev: self.invalidate())
+ # except:
+ # self._poll = True
+ # self._poll_time = poll_time
def update(self):
all_projects = arvados.util.list_all(self.api.groups().list, filters=[['group_class','=','project']])
@@ -445,14 +480,12 @@ class HomeDirectory(RecursiveInvalidateDirectory):
contents[r['name']] = r
try:
- print "start merge"
self.merge(contents.items(),
lambda i: i[0],
lambda a, i: a.uuid == i[1]['uuid'],
lambda i: ProjectDirectory(self.inode, self.inodes, self.api, i[1], poll=self._poll, poll_time=self._poll_time))
except Exception as e:
_logger.exception(e)
- print "done merge"
#def contents(self):
# return self.api.groups().contents(uuid=self.uuid).execute()['items']
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list