19699: Accomodate failed HEAD requests, add If-None-Match
authorPeter Amstutz <peter.amstutz@curii.com>
Mon, 7 Nov 2022 21:24:25 +0000 (16:24 -0500)
committerPeter Amstutz <peter.amstutz@curii.com>
Mon, 14 Nov 2022 16:27:45 +0000 (11:27 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

sdk/cwl/arvados_cwl/http.py

index b061f44f9655819f2dce972304a9de44419cd3b1..1826e13c3917a606bf3c0bba20c2ed5d5e898ad2 100644 (file)
@@ -77,7 +77,9 @@ def changed(url, properties, now):
     remember_headers(url, properties, req.headers, now)
 
     if req.status_code != 200:
-        raise Exception("Got status %s" % req.status_code)
+        # Sometimes endpoints are misconfigured and will deny HEAD but
+        # allow GET so instead of failing here, we'll try GET If-None-Match
+        return True
 
     pr = properties[url]
     if "ETag" in pr and "ETag" in req.headers:
@@ -91,6 +93,8 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
 
     now = utcnow()
 
+    etags = {}
+
     for item in r["items"]:
         properties = item["properties"]
         if fresh_cache(url, properties, now):
@@ -104,14 +108,27 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
             cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
             return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0])
 
+        if "ETag" in properties:
+            etags[properties["ETag"]] = item
+
     properties = {}
-    req = requests.get(url, stream=True, allow_redirects=True)
+    headers = {}
+    if etags:
+        headers['If-None-Match'] = ', '.join(['"%s"' % k for k,v in etags.items()])
+    req = requests.get(url, stream=True, allow_redirects=True, headers=headers)
 
-    if req.status_code != 200:
+    if req.status_code not in (200, 304):
         raise Exception("Failed to download '%s' got status %s " % (url, req.status_code))
 
     remember_headers(url, properties, req.headers, now)
 
+    if req.status_code == 304 and "ETag" in req.headers and req.headers["ETag"] in etags:
+        item = etags[req.headers["ETag"]]
+        item["properties"].update(properties)
+        api.collections().update(uuid=item["uuid"], body={"collection":{"properties": item["properties"]}}).execute()
+        cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
+        return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0])
+
     if "Content-Length" in properties[url]:
         cl = int(properties[url]["Content-Length"])
         logger.info("Downloading %s (%s bytes)", url, cl)