Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima@veritasgenetics.com>
from __future__ import absolute_import
from . import config
+import re
+
+def escape(path):
+ return re.sub('\\\\([0-3][0-7][0-7])', lambda m: '\\134'+m.group(1), path).replace(' ', '\\040')
+
def normalize_stream(stream_name, stream):
"""Take manifest stream and return a list of tokens in normalized format.
"""
- stream_name = stream_name.replace(' ', '\\040')
+ stream_name = escape(stream_name)
stream_tokens = [stream_name]
sortedfiles = list(stream.keys())
sortedfiles.sort()
for streamfile in sortedfiles:
# Add in file segments
current_span = None
- fout = streamfile.replace(' ', '\\040')
+ fout = escape(streamfile)
for segment in stream[streamfile]:
# Collapse adjacent segments
streamoffset = blocks[segment.locator] + segment.segment_offset
from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, WrappableFile, _BlockManager, synchronized, must_be_writable, NoopLock
from .keep import KeepLocator, KeepClient
from .stream import StreamReader
-from ._normalize_stream import normalize_stream
+from ._normalize_stream import normalize_stream, escape
from ._ranges import Range, LocatorAndRange
from .safeapi import ThreadSafeApiCache
import arvados.config as config
def stream_name(self):
raise NotImplementedError()
+
@synchronized
def has_remote_blocks(self):
"""Recursively check for a +R segment locator signature."""
pathcomponents = path.split("/", 1)
if pathcomponents[0]:
- # Don't allow naming files/dirs \\056
- if pathcomponents[0] == "\\056":
- raise IOError(errno.EINVAL, "Invalid name", pathcomponents[0])
item = self._items.get(pathcomponents[0])
if len(pathcomponents) == 1:
if item is None:
def _get_manifest_text(self, stream_name, strip, normalize, only_committed=False):
"""Encode empty directories by using an \056-named (".") empty file"""
if len(self._items) == 0:
- return "%s %s 0:0:\\056\n" % (stream_name, config.EMPTY_BLOCK_LOCATOR)
+ return "%s %s 0:0:\\056\n" % (
+ escape(stream_name), config.EMPTY_BLOCK_LOCATOR)
return super(Subcollection, self)._get_manifest_text(stream_name,
strip, normalize,
only_committed)
self.assertIs(c.find("./nonexistant.txt"), None)
self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
+ def test_escaped_paths_dont_get_unescaped_on_manifest(self):
+ # Dir & file names are literally '\056' (escaped form: \134056)
+ manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+ c = Collection(manifest)
+ self.assertEqual(c.portable_manifest_text(), manifest)
+
+ def test_escaped_paths_do_get_unescaped_on_listing(self):
+ # Dir & file names are literally '\056' (escaped form: \134056)
+ manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+ c = Collection(manifest)
+ self.assertIn('\\056 Test', c.keys())
+ self.assertIn('\\056', c['\\056 Test'].keys())
+
+ def test_make_empty_dir_with_escaped_chars(self):
+ c = Collection()
+ c.mkdirs('./Empty\\056Dir')
+ self.assertEqual(c.portable_manifest_text(),
+ './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
+ def test_make_empty_dir_with_spaces(self):
+ c = Collection()
+ c.mkdirs('./foo bar/baz waz')
+ self.assertEqual(c.portable_manifest_text(),
+ './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
def test_remove_in_subdir(self):
c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
c.remove("foo/count2.txt")
self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
- def test_create_dot_file(self):
- c = Collection()
- with self.assertRaises(IOError):
- with c.open("./dir/\\056", "wb") as f:
- f.write("Should not be allowed")
-
- def test_create_file_inside_dot_dir(self):
- c = Collection()
- with self.assertRaises(IOError):
- with c.open("./dir/\\056/foo", "wb") as f:
- f.write("Should not be allowed")
-
def test_remove_empty_subdir(self):
c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
c.remove("foo/count2.txt")