[arvados] updated: 2.1.0-3011-g60cd74f05

git repository hosting git at public.arvados.org
Mon Nov 7 21:52:07 UTC 2022


Summary of changes:
 sdk/cwl/arvados_cwl/http.py       | 24 +++++++++++++++++++++---
 sdk/cwl/arvados_cwl/pathmapper.py |  3 +++
 2 files changed, 24 insertions(+), 3 deletions(-)

       via  60cd74f05a391d0e6eba3e6b1896fd88c0dd53c2 (commit)
       via  64c887b25ab57626f468f7458d7e21c9701d3eb8 (commit)
      from  08ac4baf572e0215f49f71c30defd86043664a98 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 60cd74f05a391d0e6eba3e6b1896fd88c0dd53c2
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Nov 7 16:46:37 2022 -0500

    19699: Report download done, don't try to stage deferred downloads
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
index 1826e13c3..d85ae30d2 100644
--- a/sdk/cwl/arvados_cwl/http.py
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -166,6 +166,7 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
                     logger.info("%d downloaded, %3.2f MiB/s", count, (bps / (1024*1024)))
                 checkpoint = loopnow
 
+    logger.info("Download complete")
 
     collectionname = "Downloaded from %s" % urllib.parse.quote(url, safe='')
 
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 89364a905..a7f210347 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -160,6 +160,9 @@ class ArvPathMapper(PathMapper):
         if loc.startswith("_:"):
             return True
 
+        if self.arvrunner.defer_downloads and (loc.startswith("http:") or loc.startswith("https:")):
+            return False
+
         i = loc.rfind("/")
         if i > -1:
             loc_prefix = loc[:i+1]

commit 64c887b25ab57626f468f7458d7e21c9701d3eb8
Author: Peter Amstutz <peter.amstutz at curii.com>
Date:   Mon Nov 7 16:24:25 2022 -0500

    19699: Accomodate failed HEAD requests, add If-None-Match
    
    Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>

diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
index b061f44f9..1826e13c3 100644
--- a/sdk/cwl/arvados_cwl/http.py
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -77,7 +77,9 @@ def changed(url, properties, now):
     remember_headers(url, properties, req.headers, now)
 
     if req.status_code != 200:
-        raise Exception("Got status %s" % req.status_code)
+        # Sometimes endpoints are misconfigured and will deny HEAD but
+        # allow GET so instead of failing here, we'll try GET If-None-Match
+        return True
 
     pr = properties[url]
     if "ETag" in pr and "ETag" in req.headers:
@@ -91,6 +93,8 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
 
     now = utcnow()
 
+    etags = {}
+
     for item in r["items"]:
         properties = item["properties"]
         if fresh_cache(url, properties, now):
@@ -104,14 +108,27 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
             cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
             return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0])
 
+        if "ETag" in properties:
+            etags[properties["ETag"]] = item
+
     properties = {}
-    req = requests.get(url, stream=True, allow_redirects=True)
+    headers = {}
+    if etags:
+        headers['If-None-Match'] = ', '.join(['"%s"' % k for k,v in etags.items()])
+    req = requests.get(url, stream=True, allow_redirects=True, headers=headers)
 
-    if req.status_code != 200:
+    if req.status_code not in (200, 304):
         raise Exception("Failed to download '%s' got status %s " % (url, req.status_code))
 
     remember_headers(url, properties, req.headers, now)
 
+    if req.status_code == 304 and "ETag" in req.headers and req.headers["ETag"] in etags:
+        item = etags[req.headers["ETag"]]
+        item["properties"].update(properties)
+        api.collections().update(uuid=item["uuid"], body={"collection":{"properties": item["properties"]}}).execute()
+        cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
+        return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0])
+
     if "Content-Length" in properties[url]:
         cl = int(properties[url]["Content-Length"])
         logger.info("Downloading %s (%s bytes)", url, cl)

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list