[arvados] updated: 2.1.0-3011-g60cd74f05
git repository hosting
git at public.arvados.org
Mon Nov 7 21:52:07 UTC 2022
Summary of changes:
sdk/cwl/arvados_cwl/http.py | 24 +++++++++++++++++++++---
sdk/cwl/arvados_cwl/pathmapper.py | 3 +++
2 files changed, 24 insertions(+), 3 deletions(-)
via 60cd74f05a391d0e6eba3e6b1896fd88c0dd53c2 (commit)
via 64c887b25ab57626f468f7458d7e21c9701d3eb8 (commit)
from 08ac4baf572e0215f49f71c30defd86043664a98 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 60cd74f05a391d0e6eba3e6b1896fd88c0dd53c2
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon Nov 7 16:46:37 2022 -0500
19699: Report download done, don't try to stage deferred downloads
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
index 1826e13c3..d85ae30d2 100644
--- a/sdk/cwl/arvados_cwl/http.py
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -166,6 +166,7 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
logger.info("%d downloaded, %3.2f MiB/s", count, (bps / (1024*1024)))
checkpoint = loopnow
+ logger.info("Download complete")
collectionname = "Downloaded from %s" % urllib.parse.quote(url, safe='')
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py
index 89364a905..a7f210347 100644
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -160,6 +160,9 @@ class ArvPathMapper(PathMapper):
if loc.startswith("_:"):
return True
+ if self.arvrunner.defer_downloads and (loc.startswith("http:") or loc.startswith("https:")):
+ return False
+
i = loc.rfind("/")
if i > -1:
loc_prefix = loc[:i+1]
commit 64c887b25ab57626f468f7458d7e21c9701d3eb8
Author: Peter Amstutz <peter.amstutz at curii.com>
Date: Mon Nov 7 16:24:25 2022 -0500
19699: Accomodate failed HEAD requests, add If-None-Match
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz at curii.com>
diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py
index b061f44f9..1826e13c3 100644
--- a/sdk/cwl/arvados_cwl/http.py
+++ b/sdk/cwl/arvados_cwl/http.py
@@ -77,7 +77,9 @@ def changed(url, properties, now):
remember_headers(url, properties, req.headers, now)
if req.status_code != 200:
- raise Exception("Got status %s" % req.status_code)
+ # Sometimes endpoints are misconfigured and will deny HEAD but
+ # allow GET so instead of failing here, we'll try GET If-None-Match
+ return True
pr = properties[url]
if "ETag" in pr and "ETag" in req.headers:
@@ -91,6 +93,8 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
now = utcnow()
+ etags = {}
+
for item in r["items"]:
properties = item["properties"]
if fresh_cache(url, properties, now):
@@ -104,14 +108,27 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0])
+ if "ETag" in properties:
+ etags[properties["ETag"]] = item
+
properties = {}
- req = requests.get(url, stream=True, allow_redirects=True)
+ headers = {}
+ if etags:
+ headers['If-None-Match'] = ', '.join(['"%s"' % k for k,v in etags.items()])
+ req = requests.get(url, stream=True, allow_redirects=True, headers=headers)
- if req.status_code != 200:
+ if req.status_code not in (200, 304):
raise Exception("Failed to download '%s' got status %s " % (url, req.status_code))
remember_headers(url, properties, req.headers, now)
+ if req.status_code == 304 and "ETag" in req.headers and req.headers["ETag"] in etags:
+ item = etags[req.headers["ETag"]]
+ item["properties"].update(properties)
+ api.collections().update(uuid=item["uuid"], body={"collection":{"properties": item["properties"]}}).execute()
+ cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
+ return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0])
+
if "Content-Length" in properties[url]:
cl = int(properties[url]["Content-Length"])
logger.info("Downloading %s (%s bytes)", url, cl)
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list