projects
/
arvados.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Merge branch '18797-retry-docstrings'.
[arvados.git]
/
sdk
/
python
/
arvados
/
_normalize_stream.py
diff --git
a/sdk/python/arvados/_normalize_stream.py
b/sdk/python/arvados/_normalize_stream.py
index 47b66c82da000d840bdb7221575019cf6396981e..485c757e7fce34dda579185608f39bfe4911bd94 100644
(file)
--- a/
sdk/python/arvados/_normalize_stream.py
+++ b/
sdk/python/arvados/_normalize_stream.py
@@
-5,6
+5,13
@@
from __future__ import absolute_import
from . import config
from __future__ import absolute_import
from . import config
+import re
+
+def escape(path):
+ path = re.sub('\\\\', lambda m: '\\134', path)
+ path = re.sub('[:\000-\040]', lambda m: "\\%03o" % ord(m.group(0)), path)
+ return path
+
def normalize_stream(stream_name, stream):
"""Take manifest stream and return a list of tokens in normalized format.
def normalize_stream(stream_name, stream):
"""Take manifest stream and return a list of tokens in normalized format.
@@
-16,7
+23,7
@@
def normalize_stream(stream_name, stream):
"""
"""
- stream_name =
stream_name.replace(' ', '\\040'
)
+ stream_name =
escape(stream_name
)
stream_tokens = [stream_name]
sortedfiles = list(stream.keys())
sortedfiles.sort()
stream_tokens = [stream_name]
sortedfiles = list(stream.keys())
sortedfiles.sort()
@@
-38,7
+45,7
@@
def normalize_stream(stream_name, stream):
for streamfile in sortedfiles:
# Add in file segments
current_span = None
for streamfile in sortedfiles:
# Add in file segments
current_span = None
- fout =
streamfile.replace(' ', '\\040'
)
+ fout =
escape(streamfile
)
for segment in stream[streamfile]:
# Collapse adjacent segments
streamoffset = blocks[segment.locator] + segment.segment_offset
for segment in stream[streamfile]:
# Collapse adjacent segments
streamoffset = blocks[segment.locator] + segment.segment_offset
@@
-48,13
+55,13
@@
def normalize_stream(stream_name, stream):
if streamoffset == current_span[1]:
current_span[1] += segment.segment_size
else:
if streamoffset == current_span[1]:
current_span[1] += segment.segment_size
else:
- stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+ stream_tokens.append(
u
"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
current_span = [streamoffset, streamoffset + segment.segment_size]
if current_span is not None:
current_span = [streamoffset, streamoffset + segment.segment_size]
if current_span is not None:
- stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+ stream_tokens.append(
u
"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
if not stream[streamfile]:
if not stream[streamfile]:
- stream_tokens.append("0:0:{0}".format(fout))
+ stream_tokens.append(
u
"0:0:{0}".format(fout))
return stream_tokens
return stream_tokens