[ARVADOS] updated: 4d779096453fd54437df2fdafd682b550e24861f

git at public.curoverse.com git at public.curoverse.com
Sun Feb 2 20:50:25 EST 2014


Summary of changes:
 sdk/python/arvados/keep.py   |    8 +-
 sdk/python/arvados/stream.py |    3 +
 sdk/python/bin/arv-mount     |  158 ++++++++++++++++++++++++++++++++++++++++++
 sdk/python/requirements.txt  |    1 +
 sdk/python/setup.py          |    4 +
 5 files changed, 170 insertions(+), 4 deletions(-)
 create mode 100755 sdk/python/bin/arv-mount

       via  4d779096453fd54437df2fdafd682b550e24861f (commit)
       via  1790ceeb9a977c6aeae9c1a5d8b6dc489bf27275 (commit)
       via  b2bc35d29fe37bf82f314b48880adba51ae1e343 (commit)
      from  7330db33df73dbb204d362253668436cb5caa7b3 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 4d779096453fd54437df2fdafd682b550e24861f
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Feb 2 17:49:24 2014 -0800

    Add arv-mount.

diff --git a/sdk/python/bin/arv-mount b/sdk/python/bin/arv-mount
new file mode 100755
index 0000000..667f36e
--- /dev/null
+++ b/sdk/python/bin/arv-mount
@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+
+import argparse
+import hashlib
+import os
+import re
+import string
+import sys
+import logging
+import fuse
+import errno
+import stat
+import arvados
+import time
+
+class KeepMount(fuse.LoggingMixIn, fuse.Operations):
+    'Read-only Keep mount.'
+
+    def __init__(self):
+        self.arv = arvados.api('v1')
+        self.reader = None
+        self.collections = {}
+        self.audited = dict(read={})
+
+    def load_collection(self, uuid):
+        if uuid in self.collections:
+            return
+        now = time.time()
+        reader = arvados.CollectionReader(uuid)
+        files = {}
+        files[''] = dict(
+            stat=dict(
+                st_mode=(stat.S_IFDIR | 0755), st_ctime=now,
+                st_mtime=now, st_atime=now, st_nlink=2))
+        try:
+            for s in reader.all_streams():
+                for f in s.all_files():
+                    path = re.sub(r'^\./', '', os.path.join(s.name(), f.name()))
+                    files[path] = dict(
+                        stat=dict(
+                            st_mode=(stat.S_IFREG | 0444),
+                            st_size=f.size(), st_nlink=1,
+                            st_ctime=now, st_mtime=now, st_atime=now),
+                        arv_file=f)
+                    logger.debug("collection.load: %s: %s" % (uuid, path))
+        except:
+            # TODO: propagate real error, don't assume ENOENT
+            raise fuse.FuseOSError(errno.ENOENT)
+        self.collections[uuid] = dict(reader=reader, files=files)
+        logger.info("collection.load %s" % uuid)
+
+    def setup_reader(self, path):
+        logger.debug("%s", path.split('/'))
+        return True
+
+    def set_args(self, args):
+        self.args = args
+
+    def parse_and_load(self, path):
+        parts = path.split(os.path.sep, 2)
+        while len(parts) < 3:
+            parts += ['']
+        if not re.match(r'[0-9a-f]{32,}(\+\S+?)*', parts[1]):
+            raise fuse.FuseOSError(errno.ENOENT)
+        if self.args.collection != []:
+            if parts[1] not in self.args.collection:
+                raise fuse.FuseOSError(errno.EPERM)
+        self.load_collection(parts[1])
+        return parts[0:3]
+
+    def audit_read(self, uuid):
+        if self.args.audit and uuid not in self.audited['read']:
+            self.audited['read'][uuid] = True
+            logger.info("collection.read %s" % uuid)
+
+    def read(self, path, size, offset, fh):
+        _, uuid, target = self.parse_and_load(path)
+        if (uuid not in self.collections or
+            target not in self.collections[uuid]['files']):
+            raise fuse.FuseOSError(errno.ENOENT)
+        self.audit_read(uuid)
+        f = self.collections[uuid]['files'][target]['arv_file']
+        f.seek(offset)
+        return f.read(size)
+
+    def readdir(self, path, fh):
+        if path == '/':
+            raise fuse.FuseOSError(errno.EPERM)
+        _, uuid, target = self.parse_and_load(path)
+        if uuid not in self.collections:
+            raise fuse.FuseOSError(errno.ENOENT)
+        if target != '' and target[-1] != os.path.sep:
+            target += os.path.sep
+        dirs = {}
+        for filepath in self.collections[uuid]['files']:
+            if filepath != '':
+                logger.debug(filepath)
+                if target == '' or 0 == string.find(filepath, target):
+                    dirs[filepath[len(target):].split(os.path.sep)[0]] = True
+        return ['.', '..'] + dirs.keys()
+
+    def getattr(self, path, fh=None):
+        if path == '/':
+            now = time.time()
+            return dict(st_mode=(stat.S_IFDIR | 0111), st_ctime=now,
+                        st_mtime=now, st_atime=now, st_nlink=2)
+        _, uuid, target = self.parse_and_load(path)
+        if uuid not in self.collections:
+            raise fuse.FuseOSError(errno.ENOENT)
+        if target in self.collections[uuid]['files']:
+            return self.collections[uuid]['files'][target]['stat']
+        for filepath in self.collections[uuid]['files']:
+            if filepath != '':
+                if target == '' or 0 == string.find(filepath, target + '/'):
+                    return self.collections[uuid]['files']['']['stat']
+        raise fuse.FuseOSError(errno.ENOENT)
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Mount Keep data under the local filesystem.')
+    parser.add_argument('mountpoint', type=str,
+                        help="""
+Mount point.
+""")
+    parser.add_argument('--collection', type=str, action='append', default=[],
+                        help="""
+Collection locator. If none supplied, provide access to all readable
+manifests.
+""")
+    parser.add_argument('--audit', action='store_true',
+                        help="""
+Print the collection uuid on stderr the first time a given collection
+is read.
+""")
+    parser.add_argument('--debug', action='store_true',
+                        help="""
+Print debug messages.
+""")
+    parser.add_argument('--foreground', action='store_true',
+                        help="""
+Run in foreground, instead of detaching and running as a daemon.
+""")
+    args = parser.parse_args()
+    return args
+
+if __name__ == '__main__':
+    args = parse_args()
+    logger = logging.getLogger(os.path.basename(sys.argv[0]))
+    if args.audit:
+        logging.basicConfig(level=logging.INFO)
+    if args.debug:
+        logging.basicConfig(level=logging.DEBUG)
+    mounter = KeepMount()
+    mounter.set_args(args)
+    fuse = fuse.FUSE(mounter,
+                     args.mountpoint,
+                     foreground=args.foreground,
+                     fsname='arv-mount')
diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt
index bbc4718..61d57fe 100644
--- a/sdk/python/requirements.txt
+++ b/sdk/python/requirements.txt
@@ -2,3 +2,4 @@ google-api-python-client==1.2
 httplib2==0.8
 python-gflags==2.0
 urllib3==1.7.1
+fusepy==2.0.2
diff --git a/sdk/python/setup.py b/sdk/python/setup.py
index 99fc9c4..7506931 100644
--- a/sdk/python/setup.py
+++ b/sdk/python/setup.py
@@ -15,9 +15,13 @@ setup(name='arvados-python-client',
       scripts=[
         'bin/arv-get',
         'bin/arv-put',
+        'bin/arv-mount',
         ],
       install_requires=[
         'python-gflags',
         'google-api-python-client',
+        'httplib2',
+        'urllib3',
+        'fusepy',
         ],
       zip_safe=False)

commit 1790ceeb9a977c6aeae9c1a5d8b6dc489bf27275
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Feb 2 17:27:26 2014 -0800

    Add StreamFileReader.seek() method.

diff --git a/sdk/python/arvados/stream.py b/sdk/python/arvados/stream.py
index d61de4d..0d0caee 100644
--- a/sdk/python/arvados/stream.py
+++ b/sdk/python/arvados/stream.py
@@ -55,6 +55,9 @@ class StreamFileReader(object):
                 break
             yield data
 
+    def seek(self, pos):
+        self._filepos = pos
+
     def bunzip2(self, size):
         decompressor = bz2.BZ2Decompressor()
         for chunk in self.readall(size):

commit b2bc35d29fe37bf82f314b48880adba51ae1e343
Author: Tom Clegg <tom at curoverse.com>
Date:   Sun Feb 2 17:26:59 2014 -0800

    Fix scoping for error classes.

diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py
index c9f83bf..b2bf3b4 100644
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -22,7 +22,7 @@ global_client_object = None
 
 from api import *
 import config
-import errors
+import arvados.errors
 
 class Keep:
     @staticmethod
@@ -192,7 +192,7 @@ class KeepClient(object):
             except (httplib2.HttpLib2Error, httplib.ResponseNotReady) as e:
                 logging.info("Request fail: GET %s => %s: %s" %
                              (url, type(e), str(e)))
-        raise errors.NotFoundError("Block not found: %s" % expect_hash)
+        raise arvados.errors.NotFoundError("Block not found: %s" % expect_hash)
 
     def put(self, data, **kwargs):
         if 'KEEP_LOCAL_STORE' in os.environ:
@@ -218,7 +218,7 @@ class KeepClient(object):
         have_copies = thread_limiter.done()
         if have_copies == want_copies:
             return (data_hash + '+' + str(len(data)))
-        raise errors.KeepWriteError(
+        raise arvados.errors.KeepWriteError(
             "Write fail for %s: wanted %d but wrote %d" %
             (data_hash, want_copies, have_copies))
 
@@ -243,7 +243,7 @@ class KeepClient(object):
     def local_store_get(locator):
         r = re.search('^([0-9a-f]{32,})', locator)
         if not r:
-            raise errors.NotFoundError(
+            raise arvados.errors.NotFoundError(
                 "Invalid data locator: '%s'" % locator)
         if r.group(0) == config.EMPTY_BLOCK_LOCATOR.split('+')[0]:
             return ''

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list