[ARVADOS] created: e47fe8663303df51081a77646e061b11aa892df1
Git user
git at public.curoverse.com
Wed Mar 22 17:55:45 EDT 2017
at e47fe8663303df51081a77646e061b11aa892df1 (commit)
commit e47fe8663303df51081a77646e061b11aa892df1
Author: Tom Clegg <tom at curoverse.com>
Date: Wed Mar 22 17:45:32 2017 -0400
10699: Delete >2-day old cache entries.
diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py
index b324722..d1263e2 100644
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -137,7 +137,7 @@ def http_cache(data_type):
util.mkdir_dash_p(path)
except OSError:
path = None
- return cache.SafeHTTPCache(path)
+ return cache.SafeHTTPCache(path, max_age=60*60*24*2)
def api(version=None, cache=True, host=None, token=None, insecure=False, **kwargs):
"""Return an apiclient Resources object for an Arvados instance.
diff --git a/sdk/python/arvados/cache.py b/sdk/python/arvados/cache.py
index 6ab6b16..7a557e5 100644
--- a/sdk/python/arvados/cache.py
+++ b/sdk/python/arvados/cache.py
@@ -2,10 +2,31 @@ import errno
import md5
import os
import tempfile
+import time
class SafeHTTPCache(object):
- def __init__(self, path=None):
+ """Thread-safe replacement for httplib2.FileCache"""
+
+ def __init__(self, path=None, max_age=None):
self._dir = path
+ if max_age is not None:
+ try:
+ self._clean(threshold=time.time() - max_age)
+ except:
+ pass
+
+ def _clean(self, threshold=0):
+ for ent in os.listdir(self._dir):
+ fnm = os.path.join(self._dir, ent)
+ if os.path.isdir(fnm):
+ continue
+ stat = os.lstat(fnm)
+ if stat.st_mtime < threshold:
+ try:
+ os.unlink(fnm)
+ except OSError as err:
+ if err.errno != errno.ENOENT:
+ raise
def __str__(self):
return self._dir
diff --git a/sdk/python/tests/test_cache.py b/sdk/python/tests/test_cache.py
index ce97a17..9697e47 100644
--- a/sdk/python/tests/test_cache.py
+++ b/sdk/python/tests/test_cache.py
@@ -37,7 +37,7 @@ class CacheTest(unittest.TestCase):
shutil.rmtree(self._dir)
def test_cache_crud(self):
- c = arvados.cache.SafeHTTPCache(self._dir)
+ c = arvados.cache.SafeHTTPCache(self._dir, max_age=0)
url = 'https://example.com/foo?bar=baz'
data1 = _random(256)
data2 = _random(128)
commit f0a34ecb9b41b7d4e35575803ba3dc283f369724
Author: Tom Clegg <tom at curoverse.com>
Date: Wed Mar 22 17:44:52 2017 -0400
10669: Bust old (possibly broken) cache entries by naming our files *.tmp
diff --git a/sdk/python/arvados/cache.py b/sdk/python/arvados/cache.py
index c4e4c09..6ab6b16 100644
--- a/sdk/python/arvados/cache.py
+++ b/sdk/python/arvados/cache.py
@@ -11,7 +11,7 @@ class SafeHTTPCache(object):
return self._dir
def _filename(self, url):
- return os.path.join(self._dir, md5.new(url).hexdigest())
+ return os.path.join(self._dir, md5.new(url).hexdigest()+'.tmp')
def get(self, url):
filename = self._filename(url)
commit 8d2dd8003b6e865033e372b5db76fc2244378964
Author: Tom Clegg <tom at curoverse.com>
Date: Wed Mar 22 15:16:37 2017 -0400
10669: Robust cache for httplib2
diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py
index ccf16a5..b324722 100644
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -15,6 +15,7 @@ from apiclient import errors as apiclient_errors
import config
import errors
import util
+import cache
_logger = logging.getLogger('arvados.api')
@@ -136,7 +137,7 @@ def http_cache(data_type):
util.mkdir_dash_p(path)
except OSError:
path = None
- return path
+ return cache.SafeHTTPCache(path)
def api(version=None, cache=True, host=None, token=None, insecure=False, **kwargs):
"""Return an apiclient Resources object for an Arvados instance.
diff --git a/sdk/python/arvados/cache.py b/sdk/python/arvados/cache.py
new file mode 100644
index 0000000..c4e4c09
--- /dev/null
+++ b/sdk/python/arvados/cache.py
@@ -0,0 +1,50 @@
+import errno
+import md5
+import os
+import tempfile
+
+class SafeHTTPCache(object):
+ def __init__(self, path=None):
+ self._dir = path
+
+ def __str__(self):
+ return self._dir
+
+ def _filename(self, url):
+ return os.path.join(self._dir, md5.new(url).hexdigest())
+
+ def get(self, url):
+ filename = self._filename(url)
+ try:
+ with open(filename, 'rb') as f:
+ return f.read()
+ except IOError, OSError:
+ return None
+
+ def set(self, url, content):
+ try:
+ fd, tempname = tempfile.mkstemp(dir=self._dir)
+ except:
+ return None
+ try:
+ try:
+ f = os.fdopen(fd, 'w')
+ except:
+ os.close(fd)
+ raise
+ try:
+ f.write(content)
+ finally:
+ f.close()
+ os.rename(tempname, self._filename(url))
+ tempname = None
+ finally:
+ if tempname:
+ os.unlink(tempname)
+
+ def delete(self, url):
+ try:
+ os.unlink(self._filename(url))
+ except OSError as err:
+ if err.errno != errno.ENOENT:
+ raise
diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py
index da35f4f..d10e60c 100644
--- a/sdk/python/tests/run_test_server.py
+++ b/sdk/python/tests/run_test_server.py
@@ -239,8 +239,9 @@ def run(leave_running_atexit=False):
# This will clear cached docs that belong to other processes (like
# concurrent test suites) even if they're still running. They should
# be able to tolerate that.
- for fn in glob.glob(os.path.join(arvados.http_cache('discovery'),
- '*,arvados,v1,rest,*')):
+ for fn in glob.glob(os.path.join(
+ str(arvados.http_cache('discovery')),
+ '*,arvados,v1,rest,*')):
os.unlink(fn)
pid_file = _pidfile('api')
diff --git a/sdk/python/tests/test_cache.py b/sdk/python/tests/test_cache.py
new file mode 100644
index 0000000..ce97a17
--- /dev/null
+++ b/sdk/python/tests/test_cache.py
@@ -0,0 +1,62 @@
+import md5
+import shutil
+import random
+import tempfile
+import threading
+import unittest
+
+import arvados.cache
+
+
+def _random(n):
+ return bytearray(random.getrandbits(8) for _ in xrange(n))
+
+
+class CacheTestThread(threading.Thread):
+ def __init__(self, dir):
+ super(CacheTestThread, self).__init__()
+ self._dir = dir
+
+ def run(self):
+ c = arvados.cache.SafeHTTPCache(self._dir)
+ url = 'http://example.com/foo'
+ for x in range(16):
+ data_in = _random(128)
+ data_in = md5.new(data_in).hexdigest() + "\n" + str(data_in)
+ c.set(url, data_in)
+ data_out = c.get(url)
+ digest, content = data_out.split("\n", 1)
+ self.ok = (digest == md5.new(content).hexdigest())
+
+
+class CacheTest(unittest.TestCase):
+ def setUp(self):
+ self._dir = tempfile.mkdtemp()
+
+ def tearDown(self):
+ shutil.rmtree(self._dir)
+
+ def test_cache_crud(self):
+ c = arvados.cache.SafeHTTPCache(self._dir)
+ url = 'https://example.com/foo?bar=baz'
+ data1 = _random(256)
+ data2 = _random(128)
+ self.assertEqual(None, c.get(url))
+ c.delete(url)
+ c.set(url, data1)
+ self.assertEqual(data1, c.get(url))
+ c.delete(url)
+ self.assertEqual(None, c.get(url))
+ c.set(url, data1)
+ c.set(url, data2)
+ self.assertEqual(data2, c.get(url))
+
+ def test_cache_threads(self):
+ threads = []
+ for _ in range(64):
+ t = CacheTestThread(dir=self._dir)
+ t.start()
+ threads.append(t)
+ for t in threads:
+ t.join()
+ self.assertTrue(t.ok)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list