From 34aa58ae5b28ddf0ef5f37c70e761c763bf6a431 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 7 Nov 2022 16:24:25 -0500 Subject: [PATCH] 19699: Accomodate failed HEAD requests, add If-None-Match Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- sdk/cwl/arvados_cwl/http.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/sdk/cwl/arvados_cwl/http.py b/sdk/cwl/arvados_cwl/http.py index b061f44f96..1826e13c39 100644 --- a/sdk/cwl/arvados_cwl/http.py +++ b/sdk/cwl/arvados_cwl/http.py @@ -77,7 +77,9 @@ def changed(url, properties, now): remember_headers(url, properties, req.headers, now) if req.status_code != 200: - raise Exception("Got status %s" % req.status_code) + # Sometimes endpoints are misconfigured and will deny HEAD but + # allow GET so instead of failing here, we'll try GET If-None-Match + return True pr = properties[url] if "ETag" in pr and "ETag" in req.headers: @@ -91,6 +93,8 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow): now = utcnow() + etags = {} + for item in r["items"]: properties = item["properties"] if fresh_cache(url, properties, now): @@ -104,14 +108,27 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow): cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api) return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0]) + if "ETag" in properties: + etags[properties["ETag"]] = item + properties = {} - req = requests.get(url, stream=True, allow_redirects=True) + headers = {} + if etags: + headers['If-None-Match'] = ', '.join(['"%s"' % k for k,v in etags.items()]) + req = requests.get(url, stream=True, allow_redirects=True, headers=headers) - if req.status_code != 200: + if req.status_code not in (200, 304): raise Exception("Failed to download '%s' got status %s " % (url, req.status_code)) remember_headers(url, properties, req.headers, now) + if req.status_code == 304 and "ETag" in req.headers and req.headers["ETag"] in etags: + item = etags[req.headers["ETag"]] + item["properties"].update(properties) + api.collections().update(uuid=item["uuid"], body={"collection":{"properties": item["properties"]}}).execute() + cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api) + return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0]) + if "Content-Length" in properties[url]: cl = int(properties[url]["Content-Length"]) logger.info("Downloading %s (%s bytes)", url, cl) -- 2.30.2