[ARVADOS] updated: 1.1.4-306-gc0052d1
Git user
git at public.curoverse.com
Wed May 23 15:55:05 EDT 2018
Summary of changes:
sdk/cwl/arvados_cwl/http.py | 26 ++++++++++++++++++++------
1 file changed, 20 insertions(+), 6 deletions(-)
via c0052d1e0f1d395e1cdb357ceaae640954f688a5 (commit)
from 26744a79440c6b5b0e519b4964a5f06fb2ad1c74 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit c0052d1e0f1d395e1cdb357ceaae640954f688a5
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date: Wed May 23 15:23:44 2018 -0400
11162: Smarter http downloads.
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>
diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
index ab59ad3..ea77786 100644
--- a/sdk/cwl/arvados_cwl/http.py
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -14,7 +14,11 @@ def my_formatdate(dt):
return email.utils.formatdate(timeval=time.mktime(now.timetuple()), localtime=False, usegmt=True)
def my_parsedate(text):
- return datetime.datetime(*email.utils.parsedate(text)[:6])
+ parsed = email.utils.parsedate(text)
+ if parsed:
+ return datetime.datetime(*parsed[:6])
+ else:
+ datetime.datetime(1970, 1, 1)
def fresh_cache(url, properties):
pr = properties[url]
@@ -53,7 +57,7 @@ def remember_headers(url, properties, headers):
def changed(url, properties):
- req = requests.head(url)
+ req = requests.head(url, allow_redirects=True)
remember_headers(url, properties, req.headers)
if req.status_code != 200:
@@ -67,21 +71,22 @@ def changed(url, properties):
def http_to_keep(api, project_uuid, url):
r = api.collections().list(filters=[["properties", "exists", url]]).execute()
- name = urlparse.urlparse(url).path.split("/")[-1]
for item in r["items"]:
properties = item["properties"]
if fresh_cache(url, properties):
# Do nothing
- return "keep:%s/%s" % (item["portable_data_hash"], name)
+ cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
+ return "keep:%s/%s" % (item["portable_data_hash"], cr.keys()[0])
if not changed(url, properties):
# ETag didn't change, same content, just update headers
api.collections().update(uuid=item["uuid"], body={"collection":{"properties": properties}}).execute()
- return "keep:%s/%s" % (item["portable_data_hash"], name)
+ cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
+ return "keep:%s/%s" % (item["portable_data_hash"], cr.keys()[0])
properties = {}
- req = requests.get(url, stream=True)
+ req = requests.get(url, stream=True, allow_redirects=True)
if req.status_code != 200:
raise Exception("Failed to download '%s' got status %s " % (req.status_code, url))
@@ -92,6 +97,15 @@ def http_to_keep(api, project_uuid, url):
c = arvados.collection.Collection()
+ if req.headers.get("Content-Disposition"):
+ grp = re.search(r'filename=("((\"|[^"])+)"|([^][()<>@,;:\"/?={} ]+))', req.headers["Content-Disposition"])
+ if grp.groups(2):
+ name = grp.groups(2)
+ else:
+ name = grp.groups(3)
+ else:
+ name = urlparse.urlparse(url).path.split("/")[-1]
+
count = 0
start = time.time()
checkpoint = start
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list