19699: Tighten up etag handling a bit
authorPeter Amstutz <peter.amstutz@curii.com>
Mon, 7 Nov 2022 21:59:05 +0000 (16:59 -0500)
committerPeter Amstutz <peter.amstutz@curii.com>
Mon, 14 Nov 2022 16:27:45 +0000 (11:27 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

sdk/cwl/arvados_cwl/http.py

index d85ae30d2d0be9fd66058161ce8264d0273eadba..33aa098845f4f45561f2768b14e3cd0c17ae9131 100644 (file)
@@ -88,6 +88,14 @@ def changed(url, properties, now):
 
     return True
 
 
     return True
 
+def etag_quote(etag):
+    # if it already has leading and trailing quotes, do nothing
+    if etag[0] == '"' and etag[-1] == '"':
+        return etag
+    else:
+        # Add quotes.
+        return '"' + etag + '"'
+
 def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
     r = api.collections().list(filters=[["properties", "exists", url]]).execute()
 
 def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
     r = api.collections().list(filters=[["properties", "exists", url]]).execute()
 
@@ -108,13 +116,13 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
             cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
             return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0])
 
             cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
             return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0])
 
-        if "ETag" in properties:
+        if "ETag" in properties and len(properties["ETag"]) > 2:
             etags[properties["ETag"]] = item
 
     properties = {}
     headers = {}
     if etags:
             etags[properties["ETag"]] = item
 
     properties = {}
     headers = {}
     if etags:
-        headers['If-None-Match'] = ', '.join(['"%s"' % k for k,v in etags.items()])
+        headers['If-None-Match'] = ', '.join([etag_quote(k) for k,v in etags.items()])
     req = requests.get(url, stream=True, allow_redirects=True, headers=headers)
 
     if req.status_code not in (200, 304):
     req = requests.get(url, stream=True, allow_redirects=True, headers=headers)
 
     if req.status_code not in (200, 304):