[ARVADOS] updated: 9d80d31df8ed8251651f851bc9b6ccf12f182c70

git at public.curoverse.com git at public.curoverse.com
Tue Nov 24 17:36:43 EST 2015


Summary of changes:
 services/fuse/arvados_fuse/fusefile.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

  discards  b459c9449b1a72a5df2ceeabe2e6477c5a81c2cd (commit)
  discards  128f2f47bb8d6ae97d73188fcfff337f960c299a (commit)
       via  9d80d31df8ed8251651f851bc9b6ccf12f182c70 (commit)
       via  4c0d3027e0297df45e01dd7ededddb3cfecb74fd (commit)

This update added new revisions after undoing existing revisions.  That is
to say, the old revision is not a strict subset of the new revision.  This
situation occurs when you --force push a change and generate a repository
containing something like this:

 * -- * -- B -- O -- O -- O (b459c9449b1a72a5df2ceeabe2e6477c5a81c2cd)
            \
             N -- N -- N (9d80d31df8ed8251651f851bc9b6ccf12f182c70)

When this happens we assume that you've already had alert emails for all
of the O revisions, and so we here report only the revisions in the N
branch from the common base, B.

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 9d80d31df8ed8251651f851bc9b6ccf12f182c70
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Nov 24 13:45:02 2015 -0500

    7751: Fix race by telling fuse not to cache the .arvados#collection dirent.

diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py
index b7c0091..65f117b 100644
--- a/services/fuse/arvados_fuse/__init__.py
+++ b/services/fuse/arvados_fuse/__init__.py
@@ -407,8 +407,8 @@ class Operations(llfuse.Operations):
         entry = llfuse.EntryAttributes()
         entry.st_ino = inode
         entry.generation = 0
-        entry.entry_timeout = 60
-        entry.attr_timeout = 60
+        entry.entry_timeout = 60 if e.allow_dirent_cache else 0
+        entry.attr_timeout = 60 if e.allow_attr_cache else 0
 
         entry.st_mode = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
         if isinstance(e, Directory):
diff --git a/services/fuse/arvados_fuse/fresh.py b/services/fuse/arvados_fuse/fresh.py
index ec2d47a..2075741 100644
--- a/services/fuse/arvados_fuse/fresh.py
+++ b/services/fuse/arvados_fuse/fresh.py
@@ -67,6 +67,8 @@ class FreshBase(object):
         self.cache_priority = None
         self.cache_size = 0
         self.cache_uuid = None
+        self.allow_attr_cache = True
+        self.allow_dirent_cache = True
 
     # Mark the value as stale
     def invalidate(self):
diff --git a/services/fuse/arvados_fuse/fusefile.py b/services/fuse/arvados_fuse/fusefile.py
index d4c075a..e731327 100644
--- a/services/fuse/arvados_fuse/fusefile.py
+++ b/services/fuse/arvados_fuse/fusefile.py
@@ -112,6 +112,13 @@ class FuncToJSONFile(StringFile):
         super(FuncToJSONFile, self).__init__(parent_inode, "", 0)
         self.func = func
 
+        # invalidate_inode() and invalidate_entry() are asynchronous
+        # with no callback to wait for. In order to guarantee
+        # userspace programs don't get stale data that was generated
+        # before the last invalidate(), we must disallow dirent
+        # caching entirely.
+        self.allow_dirent_cache = False
+
     def size(self):
         self._update()
         return super(FuncToJSONFile, self).size()
diff --git a/services/fuse/tests/test_tmp_collection.py b/services/fuse/tests/test_tmp_collection.py
index e403a2c..60eba1b 100644
--- a/services/fuse/tests/test_tmp_collection.py
+++ b/services/fuse/tests/test_tmp_collection.py
@@ -110,12 +110,15 @@ class TmpCollectionTest(IntegrationTest):
              r'^\. 37b51d194a7513e45b56f6524f2d51f2\+3(\+\S+)? acbd18db4cc2f85cedef654fccc4a4d8\+3(\+\S+)? 0:3:bar 3:3:foo\n$'),
             ('foo', None,
              r'^\. 37b51d194a7513e45b56f6524f2d51f2\+3(\+\S+)? 0:3:bar\n$'),
+            ('bar', None,
+             r'^$'),
         ]
-        for fn, content, expect in ops:
-            path = os.path.join(tmpdir, fn)
-            if content is None:
-                os.unlink(path)
-            else:
-                with open(path, 'w') as f:
-                    f.write(content)
-            self.assertRegexpMatches(current_manifest(tmpdir), expect)
+        for _ in range(10):
+            for fn, content, expect in ops:
+                path = os.path.join(tmpdir, fn)
+                if content is None:
+                    os.unlink(path)
+                else:
+                    with open(path, 'w') as f:
+                        f.write(content)
+                self.assertRegexpMatches(current_manifest(tmpdir), expect)

commit 4c0d3027e0297df45e01dd7ededddb3cfecb74fd
Author: Tom Clegg <tom at curoverse.com>
Date:   Tue Nov 24 13:43:42 2015 -0500

    7751: Refactor TmpCollectionDirectory: generate .arvados#collection less often.

diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py
index 04c2d50..a2135b3 100644
--- a/services/fuse/arvados_fuse/fusedir.py
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -8,8 +8,9 @@ import functools
 import threading
 from apiclient import errors as apiclient_errors
 import errno
+import time
 
-from fusefile import StringFile, ObjectFile, FuseArvadosFile
+from fusefile import StringFile, ObjectFile, FuncToJSONFile, FuseArvadosFile
 from fresh import FreshBase, convertTime, use_counter, check_update
 
 import arvados.collection
@@ -483,43 +484,36 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
     job output.
     """
 
+    class UnsaveableCollection(arvados.collection.Collection):
+        def save(self):
+            pass
+        def save_new(self):
+            pass
+
     def __init__(self, parent_inode, inodes, api_client, num_retries):
-        collection = arvados.collection.Collection(
+        collection = self.UnsaveableCollection(
             api_client=api_client,
             keep_client=api_client.keep)
-        collection.save = self._commit_collection
-        collection.save_new = self._commit_collection
         super(TmpCollectionDirectory, self).__init__(
             parent_inode, inodes, collection)
         self.collection_record_file = None
-        self._subscribed = False
-        self._update_collection_record()
-
-    def update(self, *args, **kwargs):
-        if not self._subscribed:
-            with llfuse.lock_released:
-                self.populate(self.mtime())
-            self._subscribed = True
-
-    @use_counter
-    def _commit_collection(self):
-        """Commit the data blocks, but don't save the collection to API.
+        self.populate(self.mtime())
 
-        Update the content of the special .arvados#collection file, if
-        it has been instantiated.
-        """
-        self.collection.flush()
-        self._update_collection_record()
-        if self.collection_record_file is not None:
-            self.collection_record_file.update(self.collection_record)
+    def on_event(self, *args, **kwargs):
+        super(TmpCollectionDirectory, self).on_event(*args, **kwargs)
+        if self.collection_record_file:
+            with llfuse.lock:
+                self.collection_record_file.invalidate()
             self.inodes.invalidate_inode(self.collection_record_file.inode)
+            _logger.debug("%s invalidated collection record", self)
 
-    def _update_collection_record(self):
-        self.collection_record = {
-            "uuid": None,
-            "manifest_text": self.collection.manifest_text(),
-            "portable_data_hash": self.collection.portable_data_hash(),
-        }
+    def collection_record(self):
+        with llfuse.lock_released:
+            return {
+                "uuid": None,
+                "manifest_text": self.collection.manifest_text(),
+                "portable_data_hash": self.collection.portable_data_hash(),
+            }
 
     def __contains__(self, k):
         return (k == '.arvados#collection' or
@@ -529,18 +523,26 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
     def __getitem__(self, item):
         if item == '.arvados#collection':
             if self.collection_record_file is None:
-                self.collection_record_file = ObjectFile(
+                self.collection_record_file = FuncToJSONFile(
                     self.inode, self.collection_record)
                 self.inodes.add_entry(self.collection_record_file)
             return self.collection_record_file
         return super(TmpCollectionDirectory, self).__getitem__(item)
 
+    def persisted(self):
+        return False
+
     def writable(self):
         return True
 
     def finalize(self):
         self.collection.stop_threads()
 
+    def invalidate(self):
+        if self.collection_record_file:
+            self.collection_record_file.invalidate()
+        super(TmpCollectionDirectory, self).invalidate()
+
 
 class MagicDirectory(Directory):
     """A special directory that logically contains the set of all extant keep locators.
diff --git a/services/fuse/arvados_fuse/fusefile.py b/services/fuse/arvados_fuse/fusefile.py
index 4d472cf..d4c075a 100644
--- a/services/fuse/arvados_fuse/fusefile.py
+++ b/services/fuse/arvados_fuse/fusefile.py
@@ -1,7 +1,8 @@
-import logging
-import re
 import json
 import llfuse
+import logging
+import re
+import time
 
 from fresh import FreshBase, convertTime
 
@@ -99,3 +100,30 @@ class ObjectFile(StringFile):
 
     def persisted(self):
         return True
+
+
+class FuncToJSONFile(StringFile):
+    """File content is the return value of a given function, encoded as JSON.
+
+    The function is called at the time the file is read. The result is
+    cached until invalidate() is called.
+    """
+    def __init__(self, parent_inode, func):
+        super(FuncToJSONFile, self).__init__(parent_inode, "", 0)
+        self.func = func
+
+    def size(self):
+        self._update()
+        return super(FuncToJSONFile, self).size()
+
+    def readfrom(self, *args, **kwargs):
+        self._update()
+        return super(FuncToJSONFile, self).readfrom(*args, **kwargs)
+
+    def _update(self):
+        if not self.stale():
+            return
+        self._mtime = time.time()
+        obj = self.func()
+        self.contents = json.dumps(obj, indent=4, sort_keys=True) + "\n"
+        self.fresh()

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list