+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+from __future__ import division
+from future import standard_library
+standard_library.install_aliases()
+
import requests
import email.utils
import time
import re
import arvados
import arvados.collection
-import urlparse
+import urllib.parse
import logging
+import calendar
logger = logging.getLogger('arvados.cwl-runner')
def my_formatdate(dt):
- return email.utils.formatdate(timeval=time.mktime(dt.timetuple()),
+ return email.utils.formatdate(timeval=calendar.timegm(dt.timetuple()),
localtime=False, usegmt=True)
def my_parsedate(text):
- parsed = email.utils.parsedate(text)
+ parsed = email.utils.parsedate_tz(text)
if parsed:
- return datetime.datetime(*parsed[:6])
+ if parsed[9]:
+ # Adjust to UTC
+ return datetime.datetime(*parsed[:6]) + datetime.timedelta(seconds=parsed[9])
+ else:
+ # TZ is zero or missing, assume UTC.
+ return datetime.datetime(*parsed[:6])
else:
return datetime.datetime(1970, 1, 1)
if fresh_cache(url, properties, now):
# Do nothing
cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
- return "keep:%s/%s" % (item["portable_data_hash"], cr.keys()[0])
+ return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0])
if not changed(url, properties, now):
# ETag didn't change, same content, just update headers
api.collections().update(uuid=item["uuid"], body={"collection":{"properties": properties}}).execute()
cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
- return "keep:%s/%s" % (item["portable_data_hash"], cr.keys()[0])
+ return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0])
properties = {}
req = requests.get(url, stream=True, allow_redirects=True)
else:
name = grp.group(4)
else:
- name = urlparse.urlparse(url).path.split("/")[-1]
+ name = urllib.parse.urlparse(url).path.split("/")[-1]
count = 0
start = time.time()
checkpoint = start
- with c.open(name, "w") as f:
+ with c.open(name, "wb") as f:
for chunk in req.iter_content(chunk_size=1024):
count += len(chunk)
f.write(chunk)
loopnow = time.time()
if (loopnow - checkpoint) > 20:
- bps = (float(count)/float(loopnow - start))
+ bps = count / (loopnow - start)
if cl is not None:
logger.info("%2.1f%% complete, %3.2f MiB/s, %1.0f seconds left",
- float(count * 100) / float(cl),
- bps/(1024*1024),
- (cl-count)/bps)
+ ((count * 100) / cl),
+ (bps // (1024*1024)),
+ ((cl-count) // bps))
else:
- logger.info("%d downloaded, %3.2f MiB/s", count, bps/(1024*1024))
+ logger.info("%d downloaded, %3.2f MiB/s", count, (bps / (1024*1024)))
checkpoint = loopnow
c.save_new(name="Downloaded from %s" % url, owner_uuid=project_uuid, ensure_unique_name=True)