[ARVADOS] updated: 1.1.4-327-g739b1b9
Git user
git at public.curoverse.com
Fri May 25 16:47:15 EDT 2018
Summary of changes:
sdk/cwl/arvados_cwl/http.py | 58 +++++----
sdk/cwl/tests/test_http.py | 286 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 321 insertions(+), 23 deletions(-)
create mode 100644 sdk/cwl/tests/test_http.py
via 739b1b9ec3662f988ad09509bcc933ce5c23c4e8 (commit)
from 57e511e900aa1eb175fa7f308b09516ad83492b4 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 739b1b9ec3662f988ad09509bcc933ce5c23c4e8
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Fri May 25 16:45:12 2018 -0400
11162: Add tests for http_to_keep.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
index 1ee1607..32fc1cf 100644
--- a/sdk/cwl/arvados_cwl/http.py
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -11,7 +11,8 @@ import logging
logger = logging.getLogger('arvados.cwl-runner')
def my_formatdate(dt):
- return email.utils.formatdate(timeval=time.mktime(now.timetuple()), localtime=False, usegmt=True)
+ return email.utils.formatdate(timeval=time.mktime(dt.timetuple()),
+ localtime=False, usegmt=True)
def my_parsedate(text):
parsed = email.utils.parsedate(text)
@@ -20,7 +21,7 @@ def my_parsedate(text):
else:
return datetime.datetime(1970, 1, 1)
-def fresh_cache(url, properties):
+def fresh_cache(url, properties, now):
pr = properties[url]
expires = None
@@ -45,20 +46,20 @@ def fresh_cache(url, properties):
if not expires:
return False
- return (datetime.datetime.utcnow() < expires)
+ return (now < expires)
-def remember_headers(url, properties, headers):
+def remember_headers(url, properties, headers, now):
properties.setdefault(url, {})
for h in ("Cache-Control", "ETag", "Expires", "Date", "Content-Length"):
if h in headers:
properties[url][h] = headers[h]
if "Date" not in headers:
- properties[url]["Date"] = my_formatdate(datetime.datetime.utcnow())
+ properties[url]["Date"] = my_formatdate(now)
-def changed(url, properties):
+def changed(url, properties, now):
req = requests.head(url, allow_redirects=True)
- remember_headers(url, properties, req.headers)
+ remember_headers(url, properties, req.headers, now)
if req.status_code != 200:
raise Exception("Got status %s" % req.status_code)
@@ -67,19 +68,22 @@ def changed(url, properties):
if "ETag" in pr and "ETag" in req.headers:
if pr["ETag"] == req.headers["ETag"]:
return False
+
return True
-def http_to_keep(api, project_uuid, url):
+def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
r = api.collections().list(filters=[["properties", "exists", url]]).execute()
+ now = utcnow()
+
for item in r["items"]:
properties = item["properties"]
- if fresh_cache(url, properties):
+ if fresh_cache(url, properties, now):
# Do nothing
cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
return "keep:%s/%s" % (item["portable_data_hash"], cr.keys()[0])
- if not changed(url, properties):
+ if not changed(url, properties, now):
# ETag didn't change, same content, just update headers
api.collections().update(uuid=item["uuid"], body={"collection":{"properties": properties}}).execute()
cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
@@ -91,18 +95,23 @@ def http_to_keep(api, project_uuid, url):
if req.status_code != 200:
raise Exception("Failed to download '%s' got status %s " % (url, req.status_code))
- remember_headers(url, properties, req.headers)
+ remember_headers(url, properties, req.headers, now)
- logger.info("Downloading %s (%s bytes)", url, properties[url]["Content-Length"])
+ if "Content-Length" in properties[url]:
+ cl = int(properties[url]["Content-Length"])
+ logger.info("Downloading %s (%s bytes)", url, cl)
+ else:
+ cl = None
+ logger.info("Downloading %s (unknown size)", url)
c = arvados.collection.Collection()
if req.headers.get("Content-Disposition"):
grp = re.search(r'filename=("((\"|[^"])+)"|([^][()<>@,;:\"/?={} ]+))', req.headers["Content-Disposition"])
- if grp.groups(2):
- name = grp.groups(2)
+ if grp.group(2):
+ name = grp.group(2)
else:
- name = grp.groups(3)
+ name = grp.group(4)
else:
name = urlparse.urlparse(url).path.split("/")[-1]
@@ -113,14 +122,17 @@ def http_to_keep(api, project_uuid, url):
for chunk in req.iter_content(chunk_size=1024):
count += len(chunk)
f.write(chunk)
- now = time.time()
- if (now - checkpoint) > 20:
- bps = (float(count)/float(now - start))
- logger.info("%2.1f%% complete, %3.2f MiB/s, %1.0f seconds left",
- float(count * 100) / float(properties[url]["Content-Length"]),
- bps/(1024*1024),
- (int(properties[url]["Content-Length"])-count)/bps)
- checkpoint = now
+ loopnow = time.time()
+ if (loopnow - checkpoint) > 20:
+ bps = (float(count)/float(loopnow - start))
+ if cl is not None:
+ logger.info("%2.1f%% complete, %3.2f MiB/s, %1.0f seconds left",
+ float(count * 100) / float(cl),
+ bps/(1024*1024),
+ (cl-count)/bps)
+ else:
+ logger.info("%d downloaded, %3.2f MiB/s", count, bps/(1024*1024))
+ checkpoint = loopnow
c.save_new(name="Downloaded from %s" % url, owner_uuid=project_uuid, ensure_unique_name=True)
diff --git a/sdk/cwl/tests/test_http.py b/sdk/cwl/tests/test_http.py
new file mode 100644
index 0000000..0c66c39
--- /dev/null
+++ b/sdk/cwl/tests/test_http.py
@@ -0,0 +1,286 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import copy
+import cStringIO
+import functools
+import hashlib
+import json
+import logging
+import mock
+import sys
+import unittest
+import datetime
+
+import arvados
+import arvados.collection
+import arvados_cwl
+import arvados_cwl.runner
+import arvados.keep
+
+from .matcher import JsonDiffMatcher, StripYAMLComments
+from .mock_discovery import get_rootDesc
+
+import arvados_cwl.http
+
+import ruamel.yaml as yaml
+
+
+class TestHttpToKeep(unittest.TestCase):
+
+ @mock.patch("requests.get")
+ @mock.patch("arvados.collection.Collection")
+ def test_http_get(self, collectionmock, getmock):
+ api = mock.MagicMock()
+
+ api.collections().list().execute.return_value = {
+ "items": []
+ }
+
+ cm = mock.MagicMock()
+ cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
+ cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
+ collectionmock.return_value = cm
+
+ req = mock.MagicMock()
+ req.status_code = 200
+ req.headers = {}
+ req.iter_content.return_value = ["abc"]
+ getmock.return_value = req
+
+ utcnow = mock.MagicMock()
+ utcnow.return_value = datetime.datetime(2018, 5, 15)
+
+ r = arvados_cwl.http.http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
+ self.assertEqual(r, "keep:99999999999999999999999999999998+99/file1.txt")
+
+ getmock.assert_called_with("http://example.com/file1.txt", stream=True, allow_redirects=True)
+
+ cm.open.assert_called_with("file1.txt", "w")
+ cm.save_new.assert_called_with(name="Downloaded from http://example.com/file1.txt",
+ owner_uuid=None, ensure_unique_name=True)
+
+ api.collections().update.assert_has_calls([
+ mock.call(uuid=cm.manifest_locator(),
+ body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
+ ])
+
+
+ @mock.patch("requests.get")
+ @mock.patch("arvados.collection.CollectionReader")
+ def test_http_expires(self, collectionmock, getmock):
+ api = mock.MagicMock()
+
+ api.collections().list().execute.return_value = {
+ "items": [{
+ "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
+ "portable_data_hash": "99999999999999999999999999999998+99",
+ "properties": {
+ 'http://example.com/file1.txt': {
+ 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
+ 'Expires': 'Tue, 17 May 2018 00:00:00 GMT'
+ }
+ }
+ }]
+ }
+
+ cm = mock.MagicMock()
+ cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
+ cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
+ cm.keys.return_value = ["file1.txt"]
+ collectionmock.return_value = cm
+
+ req = mock.MagicMock()
+ req.status_code = 200
+ req.headers = {}
+ req.iter_content.return_value = ["abc"]
+ getmock.return_value = req
+
+ utcnow = mock.MagicMock()
+ utcnow.return_value = datetime.datetime(2018, 5, 16)
+
+ r = arvados_cwl.http.http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
+ self.assertEqual(r, "keep:99999999999999999999999999999998+99/file1.txt")
+
+ getmock.assert_not_called()
+
+
+ @mock.patch("requests.get")
+ @mock.patch("arvados.collection.CollectionReader")
+ def test_http_cache_control(self, collectionmock, getmock):
+ api = mock.MagicMock()
+
+ api.collections().list().execute.return_value = {
+ "items": [{
+ "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
+ "portable_data_hash": "99999999999999999999999999999998+99",
+ "properties": {
+ 'http://example.com/file1.txt': {
+ 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
+ 'Cache-Control': 'max-age=172800'
+ }
+ }
+ }]
+ }
+
+ cm = mock.MagicMock()
+ cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
+ cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
+ cm.keys.return_value = ["file1.txt"]
+ collectionmock.return_value = cm
+
+ req = mock.MagicMock()
+ req.status_code = 200
+ req.headers = {}
+ req.iter_content.return_value = ["abc"]
+ getmock.return_value = req
+
+ utcnow = mock.MagicMock()
+ utcnow.return_value = datetime.datetime(2018, 5, 16)
+
+ r = arvados_cwl.http.http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
+ self.assertEqual(r, "keep:99999999999999999999999999999998+99/file1.txt")
+
+ getmock.assert_not_called()
+
+
+ @mock.patch("requests.get")
+ @mock.patch("requests.head")
+ @mock.patch("arvados.collection.Collection")
+ def test_http_expired(self, collectionmock, headmock, getmock):
+ api = mock.MagicMock()
+
+ api.collections().list().execute.return_value = {
+ "items": [{
+ "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
+ "portable_data_hash": "99999999999999999999999999999998+99",
+ "properties": {
+ 'http://example.com/file1.txt': {
+ 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
+ 'Expires': 'Tue, 16 May 2018 00:00:00 GMT'
+ }
+ }
+ }]
+ }
+
+ cm = mock.MagicMock()
+ cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz4"
+ cm.portable_data_hash.return_value = "99999999999999999999999999999997+99"
+ cm.keys.return_value = ["file1.txt"]
+ collectionmock.return_value = cm
+
+ req = mock.MagicMock()
+ req.status_code = 200
+ req.headers = {'Date': 'Tue, 17 May 2018 00:00:00 GMT'}
+ req.iter_content.return_value = ["def"]
+ getmock.return_value = req
+ headmock.return_value = req
+
+ utcnow = mock.MagicMock()
+ utcnow.return_value = datetime.datetime(2018, 5, 17)
+
+ r = arvados_cwl.http.http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
+ self.assertEqual(r, "keep:99999999999999999999999999999997+99/file1.txt")
+
+ getmock.assert_called_with("http://example.com/file1.txt", stream=True, allow_redirects=True)
+
+ cm.open.assert_called_with("file1.txt", "w")
+ cm.save_new.assert_called_with(name="Downloaded from http://example.com/file1.txt",
+ owner_uuid=None, ensure_unique_name=True)
+
+ api.collections().update.assert_has_calls([
+ mock.call(uuid=cm.manifest_locator(),
+ body={"collection":{"properties": {'http://example.com/file1.txt': {'Date': 'Tue, 17 May 2018 00:00:00 GMT'}}}})
+ ])
+
+
+ @mock.patch("requests.get")
+ @mock.patch("requests.head")
+ @mock.patch("arvados.collection.CollectionReader")
+ def test_http_etag(self, collectionmock, headmock, getmock):
+ api = mock.MagicMock()
+
+ api.collections().list().execute.return_value = {
+ "items": [{
+ "uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz3",
+ "portable_data_hash": "99999999999999999999999999999998+99",
+ "properties": {
+ 'http://example.com/file1.txt': {
+ 'Date': 'Tue, 15 May 2018 00:00:00 GMT',
+ 'Expires': 'Tue, 16 May 2018 00:00:00 GMT',
+ 'ETag': '123456'
+ }
+ }
+ }]
+ }
+
+ cm = mock.MagicMock()
+ cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
+ cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
+ cm.keys.return_value = ["file1.txt"]
+ collectionmock.return_value = cm
+
+ req = mock.MagicMock()
+ req.status_code = 200
+ req.headers = {
+ 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
+ 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
+ 'ETag': '123456'
+ }
+ headmock.return_value = req
+
+ utcnow = mock.MagicMock()
+ utcnow.return_value = datetime.datetime(2018, 5, 17)
+
+ r = arvados_cwl.http.http_to_keep(api, None, "http://example.com/file1.txt", utcnow=utcnow)
+ self.assertEqual(r, "keep:99999999999999999999999999999998+99/file1.txt")
+
+ getmock.assert_not_called()
+ cm.open.assert_not_called()
+
+ api.collections().update.assert_has_calls([
+ mock.call(uuid=cm.manifest_locator(),
+ body={"collection":{"properties": {'http://example.com/file1.txt': {
+ 'Date': 'Tue, 17 May 2018 00:00:00 GMT',
+ 'Expires': 'Tue, 19 May 2018 00:00:00 GMT',
+ 'ETag': '123456'
+ }}}})
+ ])
+
+ @mock.patch("requests.get")
+ @mock.patch("arvados.collection.Collection")
+ def test_http_content_disp(self, collectionmock, getmock):
+ api = mock.MagicMock()
+
+ api.collections().list().execute.return_value = {
+ "items": []
+ }
+
+ cm = mock.MagicMock()
+ cm.manifest_locator.return_value = "zzzzz-4zz18-zzzzzzzzzzzzzz3"
+ cm.portable_data_hash.return_value = "99999999999999999999999999999998+99"
+ collectionmock.return_value = cm
+
+ req = mock.MagicMock()
+ req.status_code = 200
+ req.headers = {"Content-Disposition": "attachment; filename=file1.txt"}
+ req.iter_content.return_value = ["abc"]
+ getmock.return_value = req
+
+ utcnow = mock.MagicMock()
+ utcnow.return_value = datetime.datetime(2018, 5, 15)
+
+ r = arvados_cwl.http.http_to_keep(api, None, "http://example.com/download?fn=/file1.txt", utcnow=utcnow)
+ self.assertEqual(r, "keep:99999999999999999999999999999998+99/file1.txt")
+
+ getmock.assert_called_with("http://example.com/download?fn=/file1.txt", stream=True, allow_redirects=True)
+
+ cm.open.assert_called_with("file1.txt", "w")
+ cm.save_new.assert_called_with(name="Downloaded from http://example.com/download?fn=/file1.txt",
+ owner_uuid=None, ensure_unique_name=True)
+
+ api.collections().update.assert_has_calls([
+ mock.call(uuid=cm.manifest_locator(),
+ body={"collection":{"properties": {"http://example.com/download?fn=/file1.txt": {'Date': 'Tue, 15 May 2018 00:00:00 GMT'}}}})
+ ])
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list