[ARVADOS] updated: 1.1.4-306-gc0052d1

Git user git at public.curoverse.com
Wed May 23 15:55:05 EDT 2018


Summary of changes:
 sdk/cwl/arvados_cwl/http.py | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

       via  c0052d1e0f1d395e1cdb357ceaae640954f688a5 (commit)
      from  26744a79440c6b5b0e519b4964a5f06fb2ad1c74 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit c0052d1e0f1d395e1cdb357ceaae640954f688a5
Author: Peter Amstutz <pamstutz at veritasgenetics.com>
Date:   Wed May 23 15:23:44 2018 -0400

    11162: Smarter http downloads.
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz at veritasgenetics.com>

diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
index ab59ad3..ea77786 100644
--- a/sdk/cwl/arvados_cwl/http.py
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -14,7 +14,11 @@ def my_formatdate(dt):
     return email.utils.formatdate(timeval=time.mktime(now.timetuple()), localtime=False, usegmt=True)
 
 def my_parsedate(text):
-    return datetime.datetime(*email.utils.parsedate(text)[:6])
+    parsed = email.utils.parsedate(text)
+    if parsed:
+        return datetime.datetime(*parsed[:6])
+    else:
+        datetime.datetime(1970, 1, 1)
 
 def fresh_cache(url, properties):
     pr = properties[url]
@@ -53,7 +57,7 @@ def remember_headers(url, properties, headers):
 
 
 def changed(url, properties):
-    req = requests.head(url)
+    req = requests.head(url, allow_redirects=True)
     remember_headers(url, properties, req.headers)
 
     if req.status_code != 200:
@@ -67,21 +71,22 @@ def changed(url, properties):
 
 def http_to_keep(api, project_uuid, url):
     r = api.collections().list(filters=[["properties", "exists", url]]).execute()
-    name = urlparse.urlparse(url).path.split("/")[-1]
 
     for item in r["items"]:
         properties = item["properties"]
         if fresh_cache(url, properties):
             # Do nothing
-            return "keep:%s/%s" % (item["portable_data_hash"], name)
+            cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
+            return "keep:%s/%s" % (item["portable_data_hash"], cr.keys()[0])
 
         if not changed(url, properties):
             # ETag didn't change, same content, just update headers
             api.collections().update(uuid=item["uuid"], body={"collection":{"properties": properties}}).execute()
-            return "keep:%s/%s" % (item["portable_data_hash"], name)
+            cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
+            return "keep:%s/%s" % (item["portable_data_hash"], cr.keys()[0])
 
     properties = {}
-    req = requests.get(url, stream=True)
+    req = requests.get(url, stream=True, allow_redirects=True)
 
     if req.status_code != 200:
         raise Exception("Failed to download '%s' got status %s " % (req.status_code, url))
@@ -92,6 +97,15 @@ def http_to_keep(api, project_uuid, url):
 
     c = arvados.collection.Collection()
 
+    if req.headers.get("Content-Disposition"):
+        grp = re.search(r'filename=("((\"|[^"])+)"|([^][()<>@,;:\"/?={} ]+))', req.headers["Content-Disposition"])
+        if grp.groups(2):
+            name = grp.groups(2)
+        else:
+            name = grp.groups(3)
+    else:
+        name = urlparse.urlparse(url).path.split("/")[-1]
+
     count = 0
     start = time.time()
     checkpoint = start

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list