[ARVADOS] created: e47fe8663303df51081a77646e061b11aa892df1

Git user git at public.curoverse.com
Wed Mar 22 17:55:45 EDT 2017


        at  e47fe8663303df51081a77646e061b11aa892df1 (commit)


commit e47fe8663303df51081a77646e061b11aa892df1
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Mar 22 17:45:32 2017 -0400

    10699: Delete >2-day old cache entries.

diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py
index b324722..d1263e2 100644
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -137,7 +137,7 @@ def http_cache(data_type):
         util.mkdir_dash_p(path)
     except OSError:
         path = None
-    return cache.SafeHTTPCache(path)
+    return cache.SafeHTTPCache(path, max_age=60*60*24*2)
 
 def api(version=None, cache=True, host=None, token=None, insecure=False, **kwargs):
     """Return an apiclient Resources object for an Arvados instance.
diff --git a/sdk/python/arvados/cache.py b/sdk/python/arvados/cache.py
index 6ab6b16..7a557e5 100644
--- a/sdk/python/arvados/cache.py
+++ b/sdk/python/arvados/cache.py
@@ -2,10 +2,31 @@ import errno
 import md5
 import os
 import tempfile
+import time
 
 class SafeHTTPCache(object):
-    def __init__(self, path=None):
+    """Thread-safe replacement for httplib2.FileCache"""
+
+    def __init__(self, path=None, max_age=None):
         self._dir = path
+        if max_age is not None:
+            try:
+                self._clean(threshold=time.time() - max_age)
+            except:
+                pass
+
+    def _clean(self, threshold=0):
+        for ent in os.listdir(self._dir):
+            fnm = os.path.join(self._dir, ent)
+            if os.path.isdir(fnm):
+                continue
+            stat = os.lstat(fnm)
+            if stat.st_mtime < threshold:
+                try:
+                    os.unlink(fnm)
+                except OSError as err:
+                    if err.errno != errno.ENOENT:
+                        raise
 
     def __str__(self):
         return self._dir
diff --git a/sdk/python/tests/test_cache.py b/sdk/python/tests/test_cache.py
index ce97a17..9697e47 100644
--- a/sdk/python/tests/test_cache.py
+++ b/sdk/python/tests/test_cache.py
@@ -37,7 +37,7 @@ class CacheTest(unittest.TestCase):
         shutil.rmtree(self._dir)
 
     def test_cache_crud(self):
-        c = arvados.cache.SafeHTTPCache(self._dir)
+        c = arvados.cache.SafeHTTPCache(self._dir, max_age=0)
         url = 'https://example.com/foo?bar=baz'
         data1 = _random(256)
         data2 = _random(128)

commit f0a34ecb9b41b7d4e35575803ba3dc283f369724
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Mar 22 17:44:52 2017 -0400

    10669: Bust old (possibly broken) cache entries by naming our files *.tmp

diff --git a/sdk/python/arvados/cache.py b/sdk/python/arvados/cache.py
index c4e4c09..6ab6b16 100644
--- a/sdk/python/arvados/cache.py
+++ b/sdk/python/arvados/cache.py
@@ -11,7 +11,7 @@ class SafeHTTPCache(object):
         return self._dir
 
     def _filename(self, url):
-        return os.path.join(self._dir, md5.new(url).hexdigest())
+        return os.path.join(self._dir, md5.new(url).hexdigest()+'.tmp')
 
     def get(self, url):
         filename = self._filename(url)

commit 8d2dd8003b6e865033e372b5db76fc2244378964
Author: Tom Clegg <tom at curoverse.com>
Date:   Wed Mar 22 15:16:37 2017 -0400

    10669: Robust cache for httplib2

diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py
index ccf16a5..b324722 100644
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -15,6 +15,7 @@ from apiclient import errors as apiclient_errors
 import config
 import errors
 import util
+import cache
 
 _logger = logging.getLogger('arvados.api')
 
@@ -136,7 +137,7 @@ def http_cache(data_type):
         util.mkdir_dash_p(path)
     except OSError:
         path = None
-    return path
+    return cache.SafeHTTPCache(path)
 
 def api(version=None, cache=True, host=None, token=None, insecure=False, **kwargs):
     """Return an apiclient Resources object for an Arvados instance.
diff --git a/sdk/python/arvados/cache.py b/sdk/python/arvados/cache.py
new file mode 100644
index 0000000..c4e4c09
--- /dev/null
+++ b/sdk/python/arvados/cache.py
@@ -0,0 +1,50 @@
+import errno
+import md5
+import os
+import tempfile
+
+class SafeHTTPCache(object):
+    def __init__(self, path=None):
+        self._dir = path
+
+    def __str__(self):
+        return self._dir
+
+    def _filename(self, url):
+        return os.path.join(self._dir, md5.new(url).hexdigest())
+
+    def get(self, url):
+        filename = self._filename(url)
+        try:
+            with open(filename, 'rb') as f:
+                return f.read()
+        except IOError, OSError:
+            return None
+
+    def set(self, url, content):
+        try:
+            fd, tempname = tempfile.mkstemp(dir=self._dir)
+        except:
+            return None
+        try:
+            try:
+                f = os.fdopen(fd, 'w')
+            except:
+                os.close(fd)
+                raise
+            try:
+                f.write(content)
+            finally:
+                f.close()
+            os.rename(tempname, self._filename(url))
+            tempname = None
+        finally:
+            if tempname:
+                os.unlink(tempname)
+
+    def delete(self, url):
+        try:
+            os.unlink(self._filename(url))
+        except OSError as err:
+            if err.errno != errno.ENOENT:
+                raise
diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py
index da35f4f..d10e60c 100644
--- a/sdk/python/tests/run_test_server.py
+++ b/sdk/python/tests/run_test_server.py
@@ -239,8 +239,9 @@ def run(leave_running_atexit=False):
     # This will clear cached docs that belong to other processes (like
     # concurrent test suites) even if they're still running. They should
     # be able to tolerate that.
-    for fn in glob.glob(os.path.join(arvados.http_cache('discovery'),
-                                     '*,arvados,v1,rest,*')):
+    for fn in glob.glob(os.path.join(
+            str(arvados.http_cache('discovery')),
+            '*,arvados,v1,rest,*')):
         os.unlink(fn)
 
     pid_file = _pidfile('api')
diff --git a/sdk/python/tests/test_cache.py b/sdk/python/tests/test_cache.py
new file mode 100644
index 0000000..ce97a17
--- /dev/null
+++ b/sdk/python/tests/test_cache.py
@@ -0,0 +1,62 @@
+import md5
+import shutil
+import random
+import tempfile
+import threading
+import unittest
+
+import arvados.cache
+
+
+def _random(n):
+    return bytearray(random.getrandbits(8) for _ in xrange(n))
+
+
+class CacheTestThread(threading.Thread):
+    def __init__(self, dir):
+        super(CacheTestThread, self).__init__()
+        self._dir = dir
+
+    def run(self):
+        c = arvados.cache.SafeHTTPCache(self._dir)
+        url = 'http://example.com/foo'
+        for x in range(16):
+            data_in = _random(128)
+            data_in = md5.new(data_in).hexdigest() + "\n" + str(data_in)
+            c.set(url, data_in)
+            data_out = c.get(url)
+            digest, content = data_out.split("\n", 1)
+            self.ok = (digest == md5.new(content).hexdigest())
+
+
+class CacheTest(unittest.TestCase):
+    def setUp(self):
+        self._dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self._dir)
+
+    def test_cache_crud(self):
+        c = arvados.cache.SafeHTTPCache(self._dir)
+        url = 'https://example.com/foo?bar=baz'
+        data1 = _random(256)
+        data2 = _random(128)
+        self.assertEqual(None, c.get(url))
+        c.delete(url)
+        c.set(url, data1)
+        self.assertEqual(data1, c.get(url))
+        c.delete(url)
+        self.assertEqual(None, c.get(url))
+        c.set(url, data1)
+        c.set(url, data2)
+        self.assertEqual(data2, c.get(url))
+
+    def test_cache_threads(self):
+        threads = []
+        for _ in range(64):
+            t = CacheTestThread(dir=self._dir)
+            t.start()
+            threads.append(t)
+        for t in threads:
+            t.join()
+            self.assertTrue(t.ok)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list