14325: Merge branch 'master'
authorTom Clegg <tclegg@veritasgenetics.com>
Fri, 1 Feb 2019 21:29:17 +0000 (16:29 -0500)
committerTom Clegg <tclegg@veritasgenetics.com>
Fri, 1 Feb 2019 21:29:17 +0000 (16:29 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

15 files changed:
apps/workbench/app/assets/javascripts/components/edit_tags.js
apps/workbench/app/controllers/work_units_controller.rb
apps/workbench/app/models/collection.rb
build/run-tests.sh
lib/controller/handler_test.go
sdk/python/arvados/_normalize_stream.py
sdk/python/arvados/collection.py
sdk/python/tests/test_collections.py
sdk/ruby/lib/arvados/keep.rb
sdk/ruby/test/test_keep_manifest.rb
services/api/app/controllers/user_sessions_controller.rb
services/api/app/models/api_client_authorization.rb
services/api/test/functional/user_sessions_controller_test.rb
services/api/test/integration/user_sessions_test.rb
services/fuse/tests/test_mount.py

index ac4d2df7b235f57851c80dae768d1da7fda3182f..1fddb2651ef96a2cbec2e5dff1da030a0f33c3eb 100644 (file)
@@ -4,7 +4,7 @@
 
 window.SimpleInput = {
     view: function(vnode) {
-        return m("input.form-control", {
+        return m('input.form-control', {
             style: {
                 width: '100%',
             },
@@ -22,7 +22,7 @@ window.SimpleInput = {
 
 window.SelectOrAutocomplete = {
     view: function(vnode) {
-        return m("input.form-control", {
+        return m('input.form-control', {
             style: {
                 width: '100%'
             },
@@ -87,9 +87,9 @@ window.TagEditorRow = {
                     valueOpts = vnode.attrs.vocabulary().tags[vnode.attrs.name()].values
             }
         }
-        return m("tr", [
+        return m('tr', [
             // Erase tag
-            m("td", [
+            m('td', [
                 vnode.attrs.editMode &&
                 m('div.text-center', m('a.btn.btn-default.btn-sm', {
                     style: {
@@ -99,13 +99,13 @@ window.TagEditorRow = {
                 }, m('i.fa.fa-fw.fa-trash-o')))
             ]),
             // Tag key
-            m("td", [
+            m('td', [
                 vnode.attrs.editMode ?
-                m("div", {key: 'key'}, [
+                m('div', {key: 'key'}, [
                     m(inputComponent, {
                         options: nameOpts,
                         value: vnode.attrs.name,
-                        // Allow any tag name unless "strict" is set to true.
+                        // Allow any tag name unless 'strict' is set to true.
                         create: !vnode.attrs.vocabulary().strict,
                         placeholder: 'key',
                     })
@@ -113,9 +113,9 @@ window.TagEditorRow = {
                 : vnode.attrs.name
             ]),
             // Tag value
-            m("td", [
+            m('td', [
                 vnode.attrs.editMode ?
-                m("div", {key: 'value'}, [
+                m('div', {key: 'value'}, [
                     m(inputComponent, {
                         options: valueOpts,
                         value: vnode.attrs.value,
@@ -137,20 +137,20 @@ window.TagEditorRow = {
 
 window.TagEditorTable = {
     view: function(vnode) {
-        return m("table.table.table-condensed.table-justforlayout", [
-            m("colgroup", [
-                m("col", {width:"5%"}),
-                m("col", {width:"25%"}),
-                m("col", {width:"70%"}),
+        return m('table.table.table-condensed.table-justforlayout', [
+            m('colgroup', [
+                m('col', {width:'5%'}),
+                m('col', {width:'25%'}),
+                m('col', {width:'70%'}),
             ]),
-            m("thead", [
-                m("tr", [
-                    m("th"),
-                    m("th", "Key"),
-                    m("th", "Value"),
+            m('thead', [
+                m('tr', [
+                    m('th'),
+                    m('th', 'Key'),
+                    m('th', 'Value'),
                 ])
             ]),
-            m("tbody", [
+            m('tbody', [
                 vnode.attrs.tags.length > 0
                 ? vnode.attrs.tags.map(function(tag, idx) {
                     return m(TagEditorRow, {
@@ -165,7 +165,7 @@ window.TagEditorTable = {
                         vocabulary: vnode.attrs.vocabulary
                     })
                 })
-                : m("tr", m("td[colspan=3]", m("center", "Loading tags...")))
+                : m('tr', m('td[colspan=3]', m('center', 'Loading tags...')))
             ]),
         ])
     }
@@ -185,18 +185,18 @@ window.TagEditorApp = {
     oninit: function(vnode) {
         vnode.state.sessionDB = new SessionDB()
         // Get vocabulary
-        vnode.state.vocabulary = m.stream({"strict":false, "tags":{}})
+        vnode.state.vocabulary = m.stream({'strict':false, 'tags':{}})
         var vocabularyTimestamp = parseInt(Date.now() / 300000) // Bust cache every 5 minutes
         m.request('/vocabulary.json?v=' + vocabularyTimestamp).then(vnode.state.vocabulary)
         vnode.state.editMode = vnode.attrs.targetEditable
         vnode.state.tags = []
         vnode.state.dirty = m.stream(false)
         vnode.state.dirty.map(m.redraw)
-        vnode.state.objPath = '/arvados/v1/'+vnode.attrs.targetController+'/'+vnode.attrs.targetUuid
+        vnode.state.objPath = 'arvados/v1/' + vnode.attrs.targetController + '/' + vnode.attrs.targetUuid
         // Get tags
         vnode.state.sessionDB.request(
             vnode.state.sessionDB.loadLocal(),
-            '/arvados/v1/'+vnode.attrs.targetController,
+            'arvados/v1/' + vnode.attrs.targetController,
             {
                 data: {
                     filters: JSON.stringify([['uuid', '=', vnode.attrs.targetUuid]]),
@@ -228,8 +228,8 @@ window.TagEditorApp = {
     view: function(vnode) {
         return [
             vnode.state.editMode &&
-            m("div.pull-left", [
-                m("a.btn.btn-primary.btn-sm"+(vnode.state.dirty() ? '' : '.disabled'), {
+            m('div.pull-left', [
+                m('a.btn.btn-primary.btn-sm' + (vnode.state.dirty() ? '' : '.disabled'), {
                     style: {
                         margin: '10px 0px'
                     },
@@ -244,7 +244,7 @@ window.TagEditorApp = {
                         vnode.state.sessionDB.request(
                             vnode.state.sessionDB.loadLocal(),
                             vnode.state.objPath, {
-                                method: "PUT",
+                                method: 'PUT',
                                 data: {properties: JSON.stringify(tags)}
                             }
                         ).then(function(v) {
index 767762c81e3cd3d899bda0b3bce873cc97c390b9..d3ded867c198f5c265fafb7b49a89d50e1515fc9 100644 (file)
@@ -126,7 +126,7 @@ class WorkUnitsController < ApplicationController
                           "--local",
                           "--api=containers",
                           "--project-uuid=#{params['work_unit']['owner_uuid']}",
-                          "--collection-keep-cache=#{keep_cache}",
+                          "--collection-cache-size=#{keep_cache}",
                           "/var/lib/cwl/workflow.json#main",
                           "/var/lib/cwl/cwl.input.json"]
 
index 09af60fb9cc5f18a9102d02ed8133e6651f5ecc9..f5aef841ea47a6bfc66e84bccbaa86c617267a3f 100644 (file)
@@ -68,8 +68,8 @@ class Collection < ArvadosBase
         .sort.flat_map do |parts|
         [parts + [nil]] + dir_to_tree.call(File.join(parts))
       end
-      # Then extend that list with files in this directory.
-      subnodes + tree[File.split(dirname)]
+      # Then extend that list with files in this directory, except the empty dir placeholders (0:0:. files).
+      subnodes + tree[File.split(dirname)].reject { |_, basename, size| (basename == '.') and (size == 0) }
     end
     dir_to_tree.call('.')
   end
index 0a26e851365237234262bb44c549edf5116735c7..749075d81576242a29db0d1b075414dc5f1d0270 100755 (executable)
@@ -37,7 +37,7 @@ CONFIGSRC=path Dir with api server config files to copy into source tree.
                (If none given, leave config files alone in source tree.)
 services/api_test="TEST=test/functional/arvados/v1/collections_controller_test.rb"
                Restrict apiserver tests to the given file
-sdk/python_test="--test-suite test.test_keep_locator"
+sdk/python_test="--test-suite tests.test_keep_locator"
                Restrict Python SDK tests to the given class
 apps/workbench_test="TEST=test/integration/pipeline_instances_test.rb"
                Restrict Workbench tests to the given file
index 746b9242f2198ee3c3000c808771047d4aa1c77c..f11228a31350b93f2da70a7b5ab46b8926a47b06 100644 (file)
@@ -128,7 +128,7 @@ func (s *HandlerSuite) TestProxyRedirect(c *check.C) {
        resp := httptest.NewRecorder()
        s.handler.ServeHTTP(resp, req)
        c.Check(resp.Code, check.Equals, http.StatusFound)
-       c.Check(resp.Header().Get("Location"), check.Matches, `https://0.0.0.0:1/auth/joshid\?return_to=foo&?`)
+       c.Check(resp.Header().Get("Location"), check.Matches, `https://0.0.0.0:1/auth/joshid\?return_to=%2Cfoo&?`)
 }
 
 func (s *HandlerSuite) TestValidateV1APIToken(c *check.C) {
index 47b66c82da000d840bdb7221575019cf6396981e..b579d41ed2839b73fa72c6486101abe97e46a0cc 100644 (file)
@@ -5,6 +5,13 @@
 from __future__ import absolute_import
 from . import config
 
+import re
+
+def escape(path):
+    path = re.sub('\\\\', lambda m: '\\134', path)
+    path = re.sub('[:\000-\040]', lambda m: "\\%03o" % ord(m.group(0)), path)
+    return path
+
 def normalize_stream(stream_name, stream):
     """Take manifest stream and return a list of tokens in normalized format.
 
@@ -16,7 +23,7 @@ def normalize_stream(stream_name, stream):
 
     """
 
-    stream_name = stream_name.replace(' ', '\\040')
+    stream_name = escape(stream_name)
     stream_tokens = [stream_name]
     sortedfiles = list(stream.keys())
     sortedfiles.sort()
@@ -38,7 +45,7 @@ def normalize_stream(stream_name, stream):
     for streamfile in sortedfiles:
         # Add in file segments
         current_span = None
-        fout = streamfile.replace(' ', '\\040')
+        fout = escape(streamfile)
         for segment in stream[streamfile]:
             # Collapse adjacent segments
             streamoffset = blocks[segment.locator] + segment.segment_offset
index 627f0346db2c6760710db3edaf356f4cb724bf91..7ad07cc607206fe32f46fe0c94cf9ea34e115224 100644 (file)
@@ -26,7 +26,7 @@ from stat import *
 from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, WrappableFile, _BlockManager, synchronized, must_be_writable, NoopLock
 from .keep import KeepLocator, KeepClient
 from .stream import StreamReader
-from ._normalize_stream import normalize_stream
+from ._normalize_stream import normalize_stream, escape
 from ._ranges import Range, LocatorAndRange
 from .safeapi import ThreadSafeApiCache
 import arvados.config as config
@@ -562,6 +562,7 @@ class RichCollectionBase(CollectionBase):
     def stream_name(self):
         raise NotImplementedError()
 
+
     @synchronized
     def has_remote_blocks(self):
         """Recursively check for a +R segment locator signature."""
@@ -1058,7 +1059,9 @@ class RichCollectionBase(CollectionBase):
             if stream:
                 buf.append(" ".join(normalize_stream(stream_name, stream)) + "\n")
             for dirname in [s for s in sorted_keys if isinstance(self[s], RichCollectionBase)]:
-                buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip, normalize=True, only_committed=only_committed))
+                buf.append(self[dirname].manifest_text(
+                    stream_name=os.path.join(stream_name, dirname),
+                    strip=strip, normalize=True, only_committed=only_committed))
             return "".join(buf)
         else:
             if strip:
@@ -1833,6 +1836,16 @@ class Subcollection(RichCollectionBase):
         self.name = newname
         self.lock = self.parent.root_collection().lock
 
+    @synchronized
+    def _get_manifest_text(self, stream_name, strip, normalize, only_committed=False):
+        """Encode empty directories by using an \056-named (".") empty file"""
+        if len(self._items) == 0:
+            return "%s %s 0:0:\\056\n" % (
+                escape(stream_name), config.EMPTY_BLOCK_LOCATOR)
+        return super(Subcollection, self)._get_manifest_text(stream_name,
+                                                             strip, normalize,
+                                                             only_committed)
+
 
 class CollectionReader(Collection):
     """A read-only collection object.
index de01006741e91b12047f70d6b82dfff04f80bfdc..66f062c167250b2873ea6a047554b5986f17ee46 100644 (file)
@@ -952,10 +952,49 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
         self.assertIs(c.find("./nonexistant.txt"), None)
         self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
 
+    def test_escaped_paths_dont_get_unescaped_on_manifest(self):
+        # Dir & file names are literally '\056' (escaped form: \134056)
+        manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+        c = Collection(manifest)
+        self.assertEqual(c.portable_manifest_text(), manifest)
+
+    def test_other_special_chars_on_file_token(self):
+        cases = [
+            ('\\000', '\0'),
+            ('\\011', '\t'),
+            ('\\012', '\n'),
+            ('\\072', ':'),
+            ('\\134400', '\\400'),
+        ]
+        for encoded, decoded in cases:
+            manifest = '. d41d8cd98f00b204e9800998ecf8427e+0 0:0:some%sfile.txt\n' % encoded
+            c = Collection(manifest)
+            self.assertEqual(c.portable_manifest_text(), manifest)
+            self.assertIn('some%sfile.txt' % decoded, c.keys())
+
+    def test_escaped_paths_do_get_unescaped_on_listing(self):
+        # Dir & file names are literally '\056' (escaped form: \134056)
+        manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+        c = Collection(manifest)
+        self.assertIn('\\056 Test', c.keys())
+        self.assertIn('\\056', c['\\056 Test'].keys())
+
+    def test_make_empty_dir_with_escaped_chars(self):
+        c = Collection()
+        c.mkdirs('./Empty\\056Dir')
+        self.assertEqual(c.portable_manifest_text(),
+                         './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
+    def test_make_empty_dir_with_spaces(self):
+        c = Collection()
+        c.mkdirs('./foo bar/baz waz')
+        self.assertEqual(c.portable_manifest_text(),
+                         './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
     def test_remove_in_subdir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
         c.remove("foo/count2.txt")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
 
     def test_remove_empty_subdir(self):
         c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
index b2096b5ea0ebf05248fe45690c4f5b4c377a4be0..458af53a748834f6f0eb22942b07c16a6187e029 100644 (file)
@@ -101,8 +101,14 @@ module Keep
   end
 
   class Manifest
-    STRICT_STREAM_TOKEN_REGEXP = /^(\.)(\/[^\/\s]+)*$/
-    STRICT_FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\s\/]+(\/[^\s\/]+)*)$/
+    STREAM_TOKEN_REGEXP = /^([^\000-\040\\]|\\[0-3][0-7][0-7])+$/
+    STREAM_NAME_REGEXP = /^(\.)(\/[^\/]+)*$/
+
+    EMPTY_DIR_TOKEN_REGEXP = /^0:0:\.$/ # The exception when a file can have '.' as a name
+    FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\000-\040\\]|\\[0-3][0-7][0-7])+$/
+    FILE_NAME_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\/]+(\/[^\/]+)*)$/
+
+    NON_8BIT_ENCODED_CHAR = /[^\\]\\[4-7][0-7][0-7]/
 
     # Class to parse a manifest text and provide common views of that data.
     def initialize(manifest_text)
@@ -131,7 +137,9 @@ module Keep
       end
     end
 
-    def unescape(s)
+    def self.unescape(s)
+      return nil if s.nil?
+
       # Parse backslash escapes in a Keep manifest stream or file name.
       s.gsub(/\\(\\|[0-7]{3})/) do |_|
         case $1
@@ -143,6 +151,10 @@ module Keep
       end
     end
 
+    def unescape(s)
+      self.class.unescape(s)
+    end
+
     def split_file_token token
       start_pos, filesize, filename = token.split(':', 3)
       if filename.nil?
@@ -162,15 +174,15 @@ module Keep
           elsif in_file_tokens or not Locator.valid? token
             in_file_tokens = true
 
-            file_tokens = split_file_token(token)
+            start_pos, file_size, file_name = split_file_token(token)
             stream_name_adjuster = ''
-            if file_tokens[2].include?('/')                # '/' in filename
-              parts = file_tokens[2].rpartition('/')
-              stream_name_adjuster = parts[1] + parts[0]   # /dir_parts
-              file_tokens[2] = parts[2]
+            if file_name.include?('/')                # '/' in filename
+              dirname, sep, basename = file_name.rpartition('/')
+              stream_name_adjuster = sep + dirname   # /dir_parts
+              file_name = basename
             end
 
-            yield [stream_name + stream_name_adjuster] + file_tokens
+            yield [stream_name + stream_name_adjuster, start_pos, file_size, file_name]
           end
         end
       end
@@ -197,10 +209,13 @@ module Keep
       # files.  This can help you avoid parsing the entire manifest if you
       # just want to check if a small number of files are specified.
       if stop_after.nil? or not @files.nil?
-        return files.size
+        # Avoid counting empty dir placeholders
+        return files.reject{|_, name, size| name == '.' and size == 0}.size
       end
       seen_files = {}
-      each_file_spec do |streamname, _, _, filename|
+      each_file_spec do |streamname, _, filesize, filename|
+        # Avoid counting empty dir placeholders
+        next if filename == "." and filesize == 0
         seen_files[[streamname, filename]] = true
         return stop_after if (seen_files.size >= stop_after)
       end
@@ -250,7 +265,9 @@ module Keep
         count = 0
 
         word = words.shift
-        count += 1 if word =~ STRICT_STREAM_TOKEN_REGEXP and word !~ /\/\.\.?(\/|$)/
+        raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on stream token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR
+        unescaped_word = unescape(word)
+        count += 1 if word =~ STREAM_TOKEN_REGEXP and unescaped_word =~ STREAM_NAME_REGEXP and unescaped_word !~ /\/\.\.?(\/|$)/
         raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1
 
         count = 0
@@ -262,7 +279,9 @@ module Keep
         raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0
 
         count = 0
-        while word =~ STRICT_FILE_TOKEN_REGEXP and ($~[1].split('/') & ['..','.']).empty?
+        raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on file token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR
+        while unescape(word) =~ EMPTY_DIR_TOKEN_REGEXP or
+          (word =~ FILE_TOKEN_REGEXP and unescape(word) =~ FILE_NAME_REGEXP and ($~[1].split('/') & ['..', '.']).empty?)
           word = words.shift
           count += 1
         end
index 71a24a28c08dea61a186161d6168e908f7cd4404..eee8b39699078681d89ad331fe6c74cfe6f47079 100644 (file)
@@ -62,6 +62,11 @@ class ManifestTest < Minitest::Test
     assert_equal(0, Keep::Manifest.new("").files_count)
   end
 
+  def test_empty_dir_files_count
+    assert_equal(0,
+      Keep::Manifest.new("./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n").files_count)
+  end
+
   def test_empty_files_size
     assert_equal(0, Keep::Manifest.new("").files_size)
   end
@@ -320,6 +325,7 @@ class ManifestTest < Minitest::Test
     [true, ". 00000000000000000000000000000000+0 0:0:0\n"],
     [true, ". 00000000000000000000000000000000+0 0:0:d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000@ffffffff\n"],
     [true, ". d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000@ffffffff 0:0:empty.txt\n"],
+    [true, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n"],
     [false, '. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt',
       "Invalid manifest: does not end with newline"],
     [false, "abc d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
@@ -334,8 +340,9 @@ class ManifestTest < Minitest::Test
       "invalid stream name \"./abc/..\""],
     [false, "./abc/./foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
       "invalid stream name \"./abc/./foo\""],
-    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:.\n",
-      "invalid file token \"0:0:.\""],
+    # non-empty '.'-named file tokens aren't acceptable. Empty ones are used as empty dir placeholders.
+    [false, ". 8cf8463b34caa8ac871a52d5dd7ad1ef+1 0:1:.\n",
+      "invalid file token \"0:1:.\""],
     [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:..\n",
       "invalid file token \"0:0:..\""],
     [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:./abc.txt\n",
@@ -429,6 +436,62 @@ class ManifestTest < Minitest::Test
       "Manifest invalid for stream 1: invalid file token \"0:0:foo//bar.txt\""],
     [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/\n",
       "Manifest invalid for stream 1: invalid file token \"0:0:foo/\""],
+    # escaped chars
+    [true, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n"],
+    [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\\056\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:\\\\056\\\\056\""],
+    [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\\056\\057foo\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:\\\\056\\\\056\\\\057foo\""],
+    [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0\\0720\\072foo\n",
+      "Manifest invalid for stream 1: invalid file token \"0\\\\0720\\\\072foo\""],
+    [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 \\060:\\060:foo\n",
+      "Manifest invalid for stream 1: invalid file token \"\\\\060:\\\\060:foo\""],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\057bar\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\072\n"],
+    [true, ".\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+    [true, "\\056\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+    [true, "./\\134444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+    [false, "./\\\\444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./\\\\\\\\444\""],
+    [true, "./\\011foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+    [false, "./\\011/.. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./\\\\011/..\""],
+    [false, ".\\056\\057 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \".\\\\056\\\\057\""],
+    [false, ".\\057\\056 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \".\\\\057\\\\056\""],
+    [false, ".\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\444\n",
+      "Manifest invalid for stream 1: >8-bit encoded chars not allowed on file token \"0:0:foo\\\\444\""],
+    [false, "./\\444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: >8-bit encoded chars not allowed on stream token \"./\\\\444\""],
+    [false, "./\tfoo d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./\\tfoo\""],
+    [false, "./foo\\ d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./foo\\\\\""],
+    [false, "./foo\\r d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./foo\\\\r\""],
+    [false, "./foo\\444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: >8-bit encoded chars not allowed on stream token \"./foo\\\\444\""],
+    [false, "./foo\\888 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./foo\\\\888\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\r\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\r\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\444\n",
+      "Manifest invalid for stream 1: >8-bit encoded chars not allowed on file token \"0:0:foo\\\\444\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\888\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\888\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\057/bar\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\057/bar\""],
+    [false, ".\\057/Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \".\\\\057/Data\""],
+    [true, "./Data\\040Folder d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\057foo/bar\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:\\\\057foo/bar\""],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134057foo/bar\n"],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 \\040:\\040:foo.txt\n",
+      "Manifest invalid for stream 1: invalid file token \"\\\\040:\\\\040:foo.txt\""],
   ].each do |ok, manifest, expected_error=nil|
     define_method "test_validate manifest #{manifest.inspect}" do
       assert_equal ok, Keep::Manifest.valid?(manifest)
index 020dfa53b83a6ba645a79a1696b84968144cc0cd..1889d74eaa891dc980889cfd00491f54935ee08f 100644 (file)
@@ -95,7 +95,15 @@ class UserSessionsController < ApplicationController
 
     @redirect_to = root_path
     if params.has_key?(:return_to)
-      return send_api_token_to(params[:return_to], user)
+      # return_to param's format is 'remote,return_to_url'. This comes from login()
+      # encoding the remote=zbbbb parameter passed by a client asking for a salted
+      # token.
+      remote, return_to_url = params[:return_to].split(',', 2)
+      if remote !~ /^[0-9a-z]{5}$/ && remote != ""
+        return send_error 'Invalid remote cluster id', status: 400
+      end
+      remote = nil if remote == ''
+      return send_api_token_to(return_to_url, user, remote)
     end
     redirect_to @redirect_to
   end
@@ -119,8 +127,9 @@ class UserSessionsController < ApplicationController
   # to save the return_to parameter (if it exists; see the application
   # controller). /auth/joshid bypasses the application controller.
   def login
-    auth_provider = if params[:auth_provider] then "auth_provider=#{CGI.escape(params[:auth_provider])}" else "" end
-
+    if params[:remote] !~ /^[0-9a-z]{5}$/ && !params[:remote].nil?
+      return send_error 'Invalid remote cluster id', status: 400
+    end
     if current_user and params[:return_to]
       # Already logged in; just need to send a token to the requesting
       # API client.
@@ -128,15 +137,20 @@ class UserSessionsController < ApplicationController
       # FIXME: if current_user has never authorized this app before,
       # ask for confirmation here!
 
-      send_api_token_to(params[:return_to], current_user)
-    elsif params[:return_to]
-      redirect_to "/auth/joshid?return_to=#{CGI.escape(params[:return_to])}&#{auth_provider}"
-    else
-      redirect_to "/auth/joshid?#{auth_provider}"
+      return send_api_token_to(params[:return_to], current_user, params[:remote])
     end
+    p = []
+    p << "auth_provider=#{CGI.escape(params[:auth_provider])}" if params[:auth_provider]
+    if params[:return_to]
+      # Encode remote param inside callback's return_to, so that we'll get it on
+      # create() after login.
+      remote_param = params[:remote].nil? ? '' : params[:remote]
+      p << "return_to=#{CGI.escape(remote_param + ',' + params[:return_to])}"
+    end
+    redirect_to "/auth/joshid?#{p.join('&')}"
   end
 
-  def send_api_token_to(callback_url, user)
+  def send_api_token_to(callback_url, user, remote=nil)
     # Give the API client a token for making API calls on behalf of
     # the authenticated user
 
@@ -147,19 +161,24 @@ class UserSessionsController < ApplicationController
         find_or_create_by(url_prefix: api_client_url_prefix)
     end
 
-    api_client_auth = ApiClientAuthorization.
+    @api_client_auth = ApiClientAuthorization.
       new(user: user,
           api_client: @api_client,
           created_by_ip_address: remote_ip,
           scopes: ["all"])
-    api_client_auth.save!
+    @api_client_auth.save!
 
     if callback_url.index('?')
       callback_url += '&'
     else
       callback_url += '?'
     end
-    callback_url += 'api_token=' + api_client_auth.token
+    if remote.nil?
+      token = @api_client_auth.token
+    else
+      token = @api_client_auth.salted_token(remote: remote)
+    end
+    callback_url += 'api_token=' + token
     redirect_to callback_url
   end
 
index 53ae6af46426cadd55bf7ec4ae1cc94659ef1c0f..39253e1036ba9a52b2070f9e0a7d4043fecb2d43 100644 (file)
@@ -236,6 +236,13 @@ class ApiClientAuthorization < ArvadosModel
     'v2/' + uuid + '/' + api_token
   end
 
+  def salted_token(remote:)
+    if remote.nil?
+      token
+    end
+    'v2/' + uuid + '/' + OpenSSL::HMAC.hexdigest('sha1', api_token, remote)
+  end
+
   protected
 
   def permission_to_create
index f7021cf9def1487d834186339fc38eb8cea576e7..e3048159f4e7a99f025fca9153f2f09cf4d07863 100644 (file)
@@ -17,4 +17,22 @@ class UserSessionsControllerTest < ActionController::TestCase
     assert_not_nil assigns(:api_client)
   end
 
+  test "login with remote param returns a salted token" do
+    authorize_with :inactive
+    api_client_page = 'http://client.example.com/home'
+    remote_prefix = 'zbbbb'
+    get :login, return_to: api_client_page, remote: remote_prefix
+    assert_response :redirect
+    api_client_auth = assigns(:api_client_auth)
+    assert_not_nil api_client_auth
+    assert_includes(@response.redirect_url, 'api_token='+api_client_auth.salted_token(remote: remote_prefix))
+  end
+
+  test "login with malformed remote param returns an error" do
+    authorize_with :inactive
+    api_client_page = 'http://client.example.com/home'
+    remote_prefix = 'invalid_cluster_id'
+    get :login, return_to: api_client_page, remote: remote_prefix
+    assert_response 400
+  end
 end
index 0497c6a7d56294ae3d0841db5acd8ef9a441d809..f5085999ec5681d9da56f5244bd204e5473246dd 100644 (file)
@@ -5,11 +5,15 @@
 require 'test_helper'
 
 class UserSessionsApiTest < ActionDispatch::IntegrationTest
-  def client_url
-    'https://wb.example.com'
+  # remote prefix & return url packed into the return_to param passed around
+  # between API and SSO provider.
+  def client_url(remote: nil)
+    url = ',https://wb.example.com'
+    url = "#{remote}#{url}" unless remote.nil?
+    url
   end
 
-  def mock_auth_with(email: nil, username: nil, identity_url: nil)
+  def mock_auth_with(email: nil, username: nil, identity_url: nil, remote: nil, expected_response: :redirect)
     mock = {
       'provider' => 'josh_id',
       'uid' => 'https://edward.example.com',
@@ -24,9 +28,14 @@ class UserSessionsApiTest < ActionDispatch::IntegrationTest
     mock['info']['username'] = username unless username.nil?
     mock['info']['identity_url'] = identity_url unless identity_url.nil?
     post('/auth/josh_id/callback',
-         {return_to: client_url},
+         {return_to: client_url(remote: remote)},
          {'omniauth.auth' => mock})
-    assert_response :redirect, 'Did not redirect to client with token'
+
+    errors = {
+      :redirect => 'Did not redirect to client with token',
+      400 => 'Did not return Bad Request error',
+    }
+    assert_response expected_response, errors[expected_response]
   end
 
   test 'assign username from sso' do
@@ -61,14 +70,25 @@ class UserSessionsApiTest < ActionDispatch::IntegrationTest
 
   test 'create new user during omniauth callback' do
     mock_auth_with(email: 'edward@example.com')
-    assert_equal(0, @response.redirect_url.index(client_url),
+    assert_equal(0, @response.redirect_url.index(client_url.split(',', 2)[1]),
                  'Redirected to wrong address after succesful login: was ' +
-                 @response.redirect_url + ', expected ' + client_url + '[...]')
+                 @response.redirect_url + ', expected ' + client_url.split(',', 2)[1] + '[...]')
     assert_not_nil(@response.redirect_url.index('api_token='),
                    'Expected api_token in query string of redirect url ' +
                    @response.redirect_url)
   end
 
+  test 'issue salted token from omniauth callback with remote param' do
+    mock_auth_with(email: 'edward@example.com', remote: 'zbbbb')
+    api_client_auth = assigns(:api_client_auth)
+    assert_not_nil api_client_auth
+    assert_includes(@response.redirect_url, 'api_token=' + api_client_auth.salted_token(remote: 'zbbbb'))
+  end
+
+  test 'error out from omniauth callback with invalid remote param' do
+    mock_auth_with(email: 'edward@example.com', remote: 'invalid_cluster_id', expected_response: 400)
+  end
+
   # Test various combinations of auto_setup configuration and email
   # address provided during a new user's first session setup.
   [{result: :nope, email: nil, cfg: {auto: true, repo: true, vm: true}},
index fb282d1aaa76a91cd58b9a5a15a28e7b263a1c4c..bed81ad7273d3fb4f16c295447dba9c0bb5d9dce 100644 (file)
@@ -158,7 +158,7 @@ class FuseMagicTest(MountTestBase):
         self.assertIn(self.testcollection,
                       llfuse.listdir(os.path.join(self.mounttmp, 'by_id')))
         self.assertIn(self.test_project, mount_ls)
-        self.assertIn(self.test_project, 
+        self.assertIn(self.test_project,
                       llfuse.listdir(os.path.join(self.mounttmp, 'by_id')))
 
         with self.assertRaises(OSError):
@@ -615,9 +615,10 @@ class FuseRmTest(MountTestBase):
             r'\./testdir 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$')
         self.pool.apply(fuseRmTestHelperDeleteFile, (self.mounttmp,))
 
-        # Can't have empty directories :-( so manifest will be empty.
+        # Empty directories are represented by an empty file named "."
         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
-        self.assertEqual(collection2["manifest_text"], "")
+        self.assertRegexpMatches(collection2["manifest_text"],
+                                 r'./testdir d41d8cd98f00b204e9800998ecf8427e\+0\+A\S+ 0:0:\\056\n')
 
         self.pool.apply(fuseRmTestHelperRmdir, (self.mounttmp,))
 
@@ -674,7 +675,7 @@ class FuseMvFileTest(MountTestBase):
 
         collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
         self.assertRegexpMatches(collection2["manifest_text"],
-            r'\. 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$')
+            r'\. 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt\n\./testdir d41d8cd98f00b204e9800998ecf8427e\+0\+A\S+ 0:0:\\056\n')
 
 
 def fuseRenameTestHelper(mounttmp):