14325: Merge branch 'master'

author Tom Clegg <tclegg@veritasgenetics.com>

Fri, 1 Feb 2019 21:29:17 +0000 (16:29 -0500)

committer Tom Clegg <tclegg@veritasgenetics.com>

Fri, 1 Feb 2019 21:29:17 +0000 (16:29 -0500)
author Tom Clegg <tclegg@veritasgenetics.com>
Fri, 1 Feb 2019 21:29:17 +0000 (16:29 -0500)
committer Tom Clegg <tclegg@veritasgenetics.com>
Fri, 1 Feb 2019 21:29:17 +0000 (16:29 -0500)
diff --git a/apps/workbench/app/assets/javascripts/components/edit_tags.js b/apps/workbench/app/assets/javascripts/components/edit_tags.js

index ac4d2df7b235f57851c80dae768d1da7fda3182f..1fddb2651ef96a2cbec2e5dff1da030a0f33c3eb 100644 (file)
--- a/apps/workbench/app/assets/javascripts/components/edit_tags.js
+++ b/apps/workbench/app/assets/javascripts/components/edit_tags.js
@@ -4,7 +4,7 @@
  
  window.SimpleInput = {
      view: function(vnode) {
-        return m("input.form-control", {
+        return m('input.form-control', {
              style: {
                  width: '100%',
              },
@@ -22,7 +22,7 @@ window.SimpleInput = {
  
  window.SelectOrAutocomplete = {
      view: function(vnode) {
-        return m("input.form-control", {
+        return m('input.form-control', {
              style: {
                  width: '100%'
              },
@@ -87,9 +87,9 @@ window.TagEditorRow = {
                      valueOpts = vnode.attrs.vocabulary().tags[vnode.attrs.name()].values
              }
          }
-        return m("tr", [
+        return m('tr', [
              // Erase tag
-            m("td", [
+            m('td', [
                  vnode.attrs.editMode &&
                  m('div.text-center', m('a.btn.btn-default.btn-sm', {
                      style: {
@@ -99,13 +99,13 @@ window.TagEditorRow = {
                  }, m('i.fa.fa-fw.fa-trash-o')))
              ]),
              // Tag key
-            m("td", [
+            m('td', [
                  vnode.attrs.editMode ?
-                m("div", {key: 'key'}, [
+                m('div', {key: 'key'}, [
                      m(inputComponent, {
                          options: nameOpts,
                          value: vnode.attrs.name,
-                        // Allow any tag name unless "strict" is set to true.
+                        // Allow any tag name unless 'strict' is set to true.
                          create: !vnode.attrs.vocabulary().strict,
                          placeholder: 'key',
                      })
@@ -113,9 +113,9 @@ window.TagEditorRow = {
                  : vnode.attrs.name
              ]),
              // Tag value
-            m("td", [
+            m('td', [
                  vnode.attrs.editMode ?
-                m("div", {key: 'value'}, [
+                m('div', {key: 'value'}, [
                      m(inputComponent, {
                          options: valueOpts,
                          value: vnode.attrs.value,
@@ -137,20 +137,20 @@ window.TagEditorRow = {
  
  window.TagEditorTable = {
      view: function(vnode) {
-        return m("table.table.table-condensed.table-justforlayout", [
-            m("colgroup", [
-                m("col", {width:"5%"}),
-                m("col", {width:"25%"}),
-                m("col", {width:"70%"}),
+        return m('table.table.table-condensed.table-justforlayout', [
+            m('colgroup', [
+                m('col', {width:'5%'}),
+                m('col', {width:'25%'}),
+                m('col', {width:'70%'}),
              ]),
-            m("thead", [
-                m("tr", [
-                    m("th"),
-                    m("th", "Key"),
-                    m("th", "Value"),
+            m('thead', [
+                m('tr', [
+                    m('th'),
+                    m('th', 'Key'),
+                    m('th', 'Value'),
                  ])
              ]),
-            m("tbody", [
+            m('tbody', [
                  vnode.attrs.tags.length > 0
                  ? vnode.attrs.tags.map(function(tag, idx) {
                      return m(TagEditorRow, {
@@ -165,7 +165,7 @@ window.TagEditorTable = {
                          vocabulary: vnode.attrs.vocabulary
                      })
                  })
-                : m("tr", m("td[colspan=3]", m("center", "Loading tags...")))
+                : m('tr', m('td[colspan=3]', m('center', 'Loading tags...')))
              ]),
          ])
      }
@@ -185,18 +185,18 @@ window.TagEditorApp = {
      oninit: function(vnode) {
          vnode.state.sessionDB = new SessionDB()
          // Get vocabulary
-        vnode.state.vocabulary = m.stream({"strict":false, "tags":{}})
+        vnode.state.vocabulary = m.stream({'strict':false, 'tags':{}})
          var vocabularyTimestamp = parseInt(Date.now() / 300000) // Bust cache every 5 minutes
          m.request('/vocabulary.json?v=' + vocabularyTimestamp).then(vnode.state.vocabulary)
          vnode.state.editMode = vnode.attrs.targetEditable
          vnode.state.tags = []
          vnode.state.dirty = m.stream(false)
          vnode.state.dirty.map(m.redraw)
-        vnode.state.objPath = '/arvados/v1/'+vnode.attrs.targetController+'/'+vnode.attrs.targetUuid
+        vnode.state.objPath = 'arvados/v1/' + vnode.attrs.targetController + '/' + vnode.attrs.targetUuid
          // Get tags
          vnode.state.sessionDB.request(
              vnode.state.sessionDB.loadLocal(),
-            '/arvados/v1/'+vnode.attrs.targetController,
+            'arvados/v1/' + vnode.attrs.targetController,
              {
                  data: {
                      filters: JSON.stringify([['uuid', '=', vnode.attrs.targetUuid]]),
@@ -228,8 +228,8 @@ window.TagEditorApp = {
      view: function(vnode) {
          return [
              vnode.state.editMode &&
-            m("div.pull-left", [
-                m("a.btn.btn-primary.btn-sm"+(vnode.state.dirty() ? '' : '.disabled'), {
+            m('div.pull-left', [
+                m('a.btn.btn-primary.btn-sm' + (vnode.state.dirty() ? '' : '.disabled'), {
                      style: {
                          margin: '10px 0px'
                      },
@@ -244,7 +244,7 @@ window.TagEditorApp = {
                          vnode.state.sessionDB.request(
                              vnode.state.sessionDB.loadLocal(),
                              vnode.state.objPath, {
-                                method: "PUT",
+                                method: 'PUT',
                                  data: {properties: JSON.stringify(tags)}
                              }
                          ).then(function(v) {
diff --git a/apps/workbench/app/controllers/work_units_controller.rb b/apps/workbench/app/controllers/work_units_controller.rb

index 767762c81e3cd3d899bda0b3bce873cc97c390b9..d3ded867c198f5c265fafb7b49a89d50e1515fc9 100644 (file)
--- a/apps/workbench/app/controllers/work_units_controller.rb
+++ b/apps/workbench/app/controllers/work_units_controller.rb
@@ -126,7 +126,7 @@ class WorkUnitsController < ApplicationController
                            "--local",
                            "--api=containers",
                            "--project-uuid=#{params['work_unit']['owner_uuid']}",
-                          "--collection-keep-cache=#{keep_cache}",
+                          "--collection-cache-size=#{keep_cache}",
                            "/var/lib/cwl/workflow.json#main",
                            "/var/lib/cwl/cwl.input.json"]
  
diff --git a/apps/workbench/app/models/collection.rb b/apps/workbench/app/models/collection.rb

index 09af60fb9cc5f18a9102d02ed8133e6651f5ecc9..f5aef841ea47a6bfc66e84bccbaa86c617267a3f 100644 (file)
--- a/apps/workbench/app/models/collection.rb
+++ b/apps/workbench/app/models/collection.rb
@@ -68,8 +68,8 @@ class Collection < ArvadosBase
          .sort.flat_map do |parts|
          [parts + [nil]] + dir_to_tree.call(File.join(parts))
        end
-      # Then extend that list with files in this directory.
-      subnodes + tree[File.split(dirname)]
+      # Then extend that list with files in this directory, except the empty dir placeholders (0:0:. files).
+      subnodes + tree[File.split(dirname)].reject { |_, basename, size| (basename == '.') and (size == 0) }
      end
      dir_to_tree.call('.')
    end
diff --git a/build/run-tests.sh b/build/run-tests.sh

index 0a26e851365237234262bb44c549edf5116735c7..749075d81576242a29db0d1b075414dc5f1d0270 100755 (executable)
--- a/build/run-tests.sh
+++ b/build/run-tests.sh
@@ -37,7 +37,7 @@ CONFIGSRC=path Dir with api server config files to copy into source tree.
                 (If none given, leave config files alone in source tree.)
  services/api_test="TEST=test/functional/arvados/v1/collections_controller_test.rb"
                 Restrict apiserver tests to the given file
-sdk/python_test="--test-suite test.test_keep_locator"
+sdk/python_test="--test-suite tests.test_keep_locator"
                 Restrict Python SDK tests to the given class
  apps/workbench_test="TEST=test/integration/pipeline_instances_test.rb"
                 Restrict Workbench tests to the given file
diff --git a/lib/controller/handler_test.go b/lib/controller/handler_test.go

index 746b9242f2198ee3c3000c808771047d4aa1c77c..f11228a31350b93f2da70a7b5ab46b8926a47b06 100644 (file)
--- a/lib/controller/handler_test.go
+++ b/lib/controller/handler_test.go
@@ -128,7 +128,7 @@ func (s *HandlerSuite) TestProxyRedirect(c *check.C) {
         resp := httptest.NewRecorder()
         s.handler.ServeHTTP(resp, req)
         c.Check(resp.Code, check.Equals, http.StatusFound)
-       c.Check(resp.Header().Get("Location"), check.Matches, `https://0.0.0.0:1/auth/joshid\?return_to=foo&?`)
+       c.Check(resp.Header().Get("Location"), check.Matches, `https://0.0.0.0:1/auth/joshid\?return_to=%2Cfoo&?`)
  }
  
  func (s *HandlerSuite) TestValidateV1APIToken(c *check.C) {
diff --git a/sdk/python/arvados/_normalize_stream.py b/sdk/python/arvados/_normalize_stream.py

index 47b66c82da000d840bdb7221575019cf6396981e..b579d41ed2839b73fa72c6486101abe97e46a0cc 100644 (file)
--- a/sdk/python/arvados/_normalize_stream.py
+++ b/sdk/python/arvados/_normalize_stream.py
@@ -5,6 +5,13 @@
  from __future__ import absolute_import
  from . import config
  
+import re
+
+def escape(path):
+    path = re.sub('\\\\', lambda m: '\\134', path)
+    path = re.sub('[:\000-\040]', lambda m: "\\%03o" % ord(m.group(0)), path)
+    return path
+
  def normalize_stream(stream_name, stream):
      """Take manifest stream and return a list of tokens in normalized format.
  
@@ -16,7 +23,7 @@ def normalize_stream(stream_name, stream):
  
      """
  
-    stream_name = stream_name.replace(' ', '\\040')
+    stream_name = escape(stream_name)
      stream_tokens = [stream_name]
      sortedfiles = list(stream.keys())
      sortedfiles.sort()
@@ -38,7 +45,7 @@ def normalize_stream(stream_name, stream):
      for streamfile in sortedfiles:
          # Add in file segments
          current_span = None
-        fout = streamfile.replace(' ', '\\040')
+        fout = escape(streamfile)
          for segment in stream[streamfile]:
              # Collapse adjacent segments
              streamoffset = blocks[segment.locator] + segment.segment_offset
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py

index 627f0346db2c6760710db3edaf356f4cb724bf91..7ad07cc607206fe32f46fe0c94cf9ea34e115224 100644 (file)
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -26,7 +26,7 @@ from stat import *
  from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, WrappableFile, _BlockManager, synchronized, must_be_writable, NoopLock
  from .keep import KeepLocator, KeepClient
  from .stream import StreamReader
-from ._normalize_stream import normalize_stream
+from ._normalize_stream import normalize_stream, escape
  from ._ranges import Range, LocatorAndRange
  from .safeapi import ThreadSafeApiCache
  import arvados.config as config
@@ -562,6 +562,7 @@ class RichCollectionBase(CollectionBase):
      def stream_name(self):
          raise NotImplementedError()
  
+
      @synchronized
      def has_remote_blocks(self):
          """Recursively check for a +R segment locator signature."""
@@ -1058,7 +1059,9 @@ class RichCollectionBase(CollectionBase):
              if stream:
                  buf.append(" ".join(normalize_stream(stream_name, stream)) + "\n")
              for dirname in [s for s in sorted_keys if isinstance(self[s], RichCollectionBase)]:
-                buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip, normalize=True, only_committed=only_committed))
+                buf.append(self[dirname].manifest_text(
+                    stream_name=os.path.join(stream_name, dirname),
+                    strip=strip, normalize=True, only_committed=only_committed))
              return "".join(buf)
          else:
              if strip:
@@ -1833,6 +1836,16 @@ class Subcollection(RichCollectionBase):
          self.name = newname
          self.lock = self.parent.root_collection().lock
  
+    @synchronized
+    def _get_manifest_text(self, stream_name, strip, normalize, only_committed=False):
+        """Encode empty directories by using an \056-named (".") empty file"""
+        if len(self._items) == 0:
+            return "%s %s 0:0:\\056\n" % (
+                escape(stream_name), config.EMPTY_BLOCK_LOCATOR)
+        return super(Subcollection, self)._get_manifest_text(stream_name,
+                                                             strip, normalize,
+                                                             only_committed)
+
  
  class CollectionReader(Collection):
      """A read-only collection object.
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py

index de01006741e91b12047f70d6b82dfff04f80bfdc..66f062c167250b2873ea6a047554b5986f17ee46 100644 (file)
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -952,10 +952,49 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
          self.assertIs(c.find("./nonexistant.txt"), None)
          self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
  
+    def test_escaped_paths_dont_get_unescaped_on_manifest(self):
+        # Dir & file names are literally '\056' (escaped form: \134056)
+        manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+        c = Collection(manifest)
+        self.assertEqual(c.portable_manifest_text(), manifest)
+
+    def test_other_special_chars_on_file_token(self):
+        cases = [
+            ('\\000', '\0'),
+            ('\\011', '\t'),
+            ('\\012', '\n'),
+            ('\\072', ':'),
+            ('\\134400', '\\400'),
+        ]
+        for encoded, decoded in cases:
+            manifest = '. d41d8cd98f00b204e9800998ecf8427e+0 0:0:some%sfile.txt\n' % encoded
+            c = Collection(manifest)
+            self.assertEqual(c.portable_manifest_text(), manifest)
+            self.assertIn('some%sfile.txt' % decoded, c.keys())
+
+    def test_escaped_paths_do_get_unescaped_on_listing(self):
+        # Dir & file names are literally '\056' (escaped form: \134056)
+        manifest = './\\134056\\040Test d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134056\n'
+        c = Collection(manifest)
+        self.assertIn('\\056 Test', c.keys())
+        self.assertIn('\\056', c['\\056 Test'].keys())
+
+    def test_make_empty_dir_with_escaped_chars(self):
+        c = Collection()
+        c.mkdirs('./Empty\\056Dir')
+        self.assertEqual(c.portable_manifest_text(),
+                         './Empty\\134056Dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
+    def test_make_empty_dir_with_spaces(self):
+        c = Collection()
+        c.mkdirs('./foo bar/baz waz')
+        self.assertEqual(c.portable_manifest_text(),
+                         './foo\\040bar/baz\\040waz d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n')
+
      def test_remove_in_subdir(self):
          c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
          c.remove("foo/count2.txt")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n", c.portable_manifest_text())
  
      def test_remove_empty_subdir(self):
          c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
diff --git a/sdk/ruby/lib/arvados/keep.rb b/sdk/ruby/lib/arvados/keep.rb

index b2096b5ea0ebf05248fe45690c4f5b4c377a4be0..458af53a748834f6f0eb22942b07c16a6187e029 100644 (file)
--- a/sdk/ruby/lib/arvados/keep.rb
+++ b/sdk/ruby/lib/arvados/keep.rb
@@ -101,8 +101,14 @@ module Keep
    end
  
    class Manifest
-    STRICT_STREAM_TOKEN_REGEXP = /^(\.)(\/[^\/\s]+)*$/
-    STRICT_FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\s\/]+(\/[^\s\/]+)*)$/
+    STREAM_TOKEN_REGEXP = /^([^\000-\040\\]|\\[0-3][0-7][0-7])+$/
+    STREAM_NAME_REGEXP = /^(\.)(\/[^\/]+)*$/
+
+    EMPTY_DIR_TOKEN_REGEXP = /^0:0:\.$/ # The exception when a file can have '.' as a name
+    FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\000-\040\\]|\\[0-3][0-7][0-7])+$/
+    FILE_NAME_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\/]+(\/[^\/]+)*)$/
+
+    NON_8BIT_ENCODED_CHAR = /[^\\]\\[4-7][0-7][0-7]/
  
      # Class to parse a manifest text and provide common views of that data.
      def initialize(manifest_text)
@@ -131,7 +137,9 @@ module Keep
        end
      end
  
-    def unescape(s)
+    def self.unescape(s)
+      return nil if s.nil?
+
        # Parse backslash escapes in a Keep manifest stream or file name.
        s.gsub(/\\(\\|[0-7]{3})/) do |_|
          case $1
@@ -143,6 +151,10 @@ module Keep
        end
      end
  
+    def unescape(s)
+      self.class.unescape(s)
+    end
+
      def split_file_token token
        start_pos, filesize, filename = token.split(':', 3)
        if filename.nil?
@@ -162,15 +174,15 @@ module Keep
            elsif in_file_tokens or not Locator.valid? token
              in_file_tokens = true
  
-            file_tokens = split_file_token(token)
+            start_pos, file_size, file_name = split_file_token(token)
              stream_name_adjuster = ''
-            if file_tokens[2].include?('/')                # '/' in filename
-              parts = file_tokens[2].rpartition('/')
-              stream_name_adjuster = parts[1] + parts[0]   # /dir_parts
-              file_tokens[2] = parts[2]
+            if file_name.include?('/')                # '/' in filename
+              dirname, sep, basename = file_name.rpartition('/')
+              stream_name_adjuster = sep + dirname   # /dir_parts
+              file_name = basename
              end
  
-            yield [stream_name + stream_name_adjuster] + file_tokens
+            yield [stream_name + stream_name_adjuster, start_pos, file_size, file_name]
            end
          end
        end
@@ -197,10 +209,13 @@ module Keep
        # files.  This can help you avoid parsing the entire manifest if you
        # just want to check if a small number of files are specified.
        if stop_after.nil? or not @files.nil?
-        return files.size
+        # Avoid counting empty dir placeholders
+        return files.reject{|_, name, size| name == '.' and size == 0}.size
        end
        seen_files = {}
-      each_file_spec do |streamname, _, _, filename|
+      each_file_spec do |streamname, _, filesize, filename|
+        # Avoid counting empty dir placeholders
+        next if filename == "." and filesize == 0
          seen_files[[streamname, filename]] = true
          return stop_after if (seen_files.size >= stop_after)
        end
@@ -250,7 +265,9 @@ module Keep
          count = 0
  
          word = words.shift
-        count += 1 if word =~ STRICT_STREAM_TOKEN_REGEXP and word !~ /\/\.\.?(\/|$)/
+        raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on stream token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR
+        unescaped_word = unescape(word)
+        count += 1 if word =~ STREAM_TOKEN_REGEXP and unescaped_word =~ STREAM_NAME_REGEXP and unescaped_word !~ /\/\.\.?(\/|$)/
          raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1
  
          count = 0
@@ -262,7 +279,9 @@ module Keep
          raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0
  
          count = 0
-        while word =~ STRICT_FILE_TOKEN_REGEXP and ($~[1].split('/') & ['..','.']).empty?
+        raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on file token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR
+        while unescape(word) =~ EMPTY_DIR_TOKEN_REGEXP or
+          (word =~ FILE_TOKEN_REGEXP and unescape(word) =~ FILE_NAME_REGEXP and ($~[1].split('/') & ['..', '.']).empty?)
            word = words.shift
            count += 1
          end
diff --git a/sdk/ruby/test/test_keep_manifest.rb b/sdk/ruby/test/test_keep_manifest.rb

index 71a24a28c08dea61a186161d6168e908f7cd4404..eee8b39699078681d89ad331fe6c74cfe6f47079 100644 (file)
--- a/sdk/ruby/test/test_keep_manifest.rb
+++ b/sdk/ruby/test/test_keep_manifest.rb
@@ -62,6 +62,11 @@ class ManifestTest < Minitest::Test
      assert_equal(0, Keep::Manifest.new("").files_count)
    end
  
+  def test_empty_dir_files_count
+    assert_equal(0,
+      Keep::Manifest.new("./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n").files_count)
+  end
+
    def test_empty_files_size
      assert_equal(0, Keep::Manifest.new("").files_size)
    end
@@ -320,6 +325,7 @@ class ManifestTest < Minitest::Test
      [true, ". 00000000000000000000000000000000+0 0:0:0\n"],
      [true, ". 00000000000000000000000000000000+0 0:0:d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000@ffffffff\n"],
      [true, ". d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000@ffffffff 0:0:empty.txt\n"],
+    [true, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n"],
      [false, '. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt',
        "Invalid manifest: does not end with newline"],
      [false, "abc d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
@@ -334,8 +340,9 @@ class ManifestTest < Minitest::Test
        "invalid stream name \"./abc/..\""],
      [false, "./abc/./foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
        "invalid stream name \"./abc/./foo\""],
-    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:.\n",
-      "invalid file token \"0:0:.\""],
+    # non-empty '.'-named file tokens aren't acceptable. Empty ones are used as empty dir placeholders.
+    [false, ". 8cf8463b34caa8ac871a52d5dd7ad1ef+1 0:1:.\n",
+      "invalid file token \"0:1:.\""],
      [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:..\n",
        "invalid file token \"0:0:..\""],
      [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:./abc.txt\n",
@@ -429,6 +436,62 @@ class ManifestTest < Minitest::Test
        "Manifest invalid for stream 1: invalid file token \"0:0:foo//bar.txt\""],
      [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/\n",
        "Manifest invalid for stream 1: invalid file token \"0:0:foo/\""],
+    # escaped chars
+    [true, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\n"],
+    [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\\056\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:\\\\056\\\\056\""],
+    [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\056\\056\\057foo\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:\\\\056\\\\056\\\\057foo\""],
+    [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 0\\0720\\072foo\n",
+      "Manifest invalid for stream 1: invalid file token \"0\\\\0720\\\\072foo\""],
+    [false, "./empty_dir d41d8cd98f00b204e9800998ecf8427e+0 \\060:\\060:foo\n",
+      "Manifest invalid for stream 1: invalid file token \"\\\\060:\\\\060:foo\""],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\057bar\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\072\n"],
+    [true, ".\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+    [true, "\\056\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+    [true, "./\\134444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+    [false, "./\\\\444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./\\\\\\\\444\""],
+    [true, "./\\011foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+    [false, "./\\011/.. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./\\\\011/..\""],
+    [false, ".\\056\\057 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \".\\\\056\\\\057\""],
+    [false, ".\\057\\056 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \".\\\\057\\\\056\""],
+    [false, ".\\057Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\444\n",
+      "Manifest invalid for stream 1: >8-bit encoded chars not allowed on file token \"0:0:foo\\\\444\""],
+    [false, "./\\444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: >8-bit encoded chars not allowed on stream token \"./\\\\444\""],
+    [false, "./\tfoo d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./\\tfoo\""],
+    [false, "./foo\\ d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./foo\\\\\""],
+    [false, "./foo\\r d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./foo\\\\r\""],
+    [false, "./foo\\444 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: >8-bit encoded chars not allowed on stream token \"./foo\\\\444\""],
+    [false, "./foo\\888 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./foo\\\\888\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\r\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\r\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\444\n",
+      "Manifest invalid for stream 1: >8-bit encoded chars not allowed on file token \"0:0:foo\\\\444\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\888\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\888\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\\057/bar\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:foo\\\\057/bar\""],
+    [false, ".\\057/Data d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \".\\\\057/Data\""],
+    [true, "./Data\\040Folder d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo\n"],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\057foo/bar\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:\\\\057foo/bar\""],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\134057foo/bar\n"],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 \\040:\\040:foo.txt\n",
+      "Manifest invalid for stream 1: invalid file token \"\\\\040:\\\\040:foo.txt\""],
    ].each do |ok, manifest, expected_error=nil|
      define_method "test_validate manifest #{manifest.inspect}" do
        assert_equal ok, Keep::Manifest.valid?(manifest)
diff --git a/services/api/app/controllers/user_sessions_controller.rb b/services/api/app/controllers/user_sessions_controller.rb

index 020dfa53b83a6ba645a79a1696b84968144cc0cd..1889d74eaa891dc980889cfd00491f54935ee08f 100644 (file)
--- a/services/api/app/controllers/user_sessions_controller.rb
+++ b/services/api/app/controllers/user_sessions_controller.rb
@@ -95,7 +95,15 @@ class UserSessionsController < ApplicationController
  
      @redirect_to = root_path
      if params.has_key?(:return_to)
-      return send_api_token_to(params[:return_to], user)
+      # return_to param's format is 'remote,return_to_url'. This comes from login()
+      # encoding the remote=zbbbb parameter passed by a client asking for a salted
+      # token.
+      remote, return_to_url = params[:return_to].split(',', 2)
+      if remote !~ /^[0-9a-z]{5}$/ && remote != ""
+        return send_error 'Invalid remote cluster id', status: 400
+      end
+      remote = nil if remote == ''
+      return send_api_token_to(return_to_url, user, remote)
      end
      redirect_to @redirect_to
    end
@@ -119,8 +127,9 @@ class UserSessionsController < ApplicationController
    # to save the return_to parameter (if it exists; see the application
    # controller). /auth/joshid bypasses the application controller.
    def login
-    auth_provider = if params[:auth_provider] then "auth_provider=#{CGI.escape(params[:auth_provider])}" else "" end
-
+    if params[:remote] !~ /^[0-9a-z]{5}$/ && !params[:remote].nil?
+      return send_error 'Invalid remote cluster id', status: 400
+    end
      if current_user and params[:return_to]
        # Already logged in; just need to send a token to the requesting
        # API client.
@@ -128,15 +137,20 @@ class UserSessionsController < ApplicationController
        # FIXME: if current_user has never authorized this app before,
        # ask for confirmation here!
  
-      send_api_token_to(params[:return_to], current_user)
-    elsif params[:return_to]
-      redirect_to "/auth/joshid?return_to=#{CGI.escape(params[:return_to])}&#{auth_provider}"
-    else
-      redirect_to "/auth/joshid?#{auth_provider}"
+      return send_api_token_to(params[:return_to], current_user, params[:remote])
      end
+    p = []
+    p << "auth_provider=#{CGI.escape(params[:auth_provider])}" if params[:auth_provider]
+    if params[:return_to]
+      # Encode remote param inside callback's return_to, so that we'll get it on
+      # create() after login.
+      remote_param = params[:remote].nil? ? '' : params[:remote]
+      p << "return_to=#{CGI.escape(remote_param + ',' + params[:return_to])}"
+    end
+    redirect_to "/auth/joshid?#{p.join('&')}"
    end
  
-  def send_api_token_to(callback_url, user)
+  def send_api_token_to(callback_url, user, remote=nil)
      # Give the API client a token for making API calls on behalf of
      # the authenticated user
  
@@ -147,19 +161,24 @@ class UserSessionsController < ApplicationController
          find_or_create_by(url_prefix: api_client_url_prefix)
      end
  
-    api_client_auth = ApiClientAuthorization.
+    @api_client_auth = ApiClientAuthorization.
        new(user: user,
            api_client: @api_client,
            created_by_ip_address: remote_ip,
            scopes: ["all"])
-    api_client_auth.save!
+    @api_client_auth.save!
  
      if callback_url.index('?')
        callback_url += '&'
      else
        callback_url += '?'
      end
-    callback_url += 'api_token=' + api_client_auth.token
+    if remote.nil?
+      token = @api_client_auth.token
+    else
+      token = @api_client_auth.salted_token(remote: remote)
+    end
+    callback_url += 'api_token=' + token
      redirect_to callback_url
    end
  
diff --git a/services/api/app/models/api_client_authorization.rb b/services/api/app/models/api_client_authorization.rb

index 53ae6af46426cadd55bf7ec4ae1cc94659ef1c0f..39253e1036ba9a52b2070f9e0a7d4043fecb2d43 100644 (file)
--- a/services/api/app/models/api_client_authorization.rb
+++ b/services/api/app/models/api_client_authorization.rb
@@ -236,6 +236,13 @@ class ApiClientAuthorization < ArvadosModel
      'v2/' + uuid + '/' + api_token
    end
  
+  def salted_token(remote:)
+    if remote.nil?
+      token
+    end
+    'v2/' + uuid + '/' + OpenSSL::HMAC.hexdigest('sha1', api_token, remote)
+  end
+
    protected
  
    def permission_to_create
diff --git a/services/api/test/functional/user_sessions_controller_test.rb b/services/api/test/functional/user_sessions_controller_test.rb

index f7021cf9def1487d834186339fc38eb8cea576e7..e3048159f4e7a99f025fca9153f2f09cf4d07863 100644 (file)
--- a/services/api/test/functional/user_sessions_controller_test.rb
+++ b/services/api/test/functional/user_sessions_controller_test.rb
@@ -17,4 +17,22 @@ class UserSessionsControllerTest < ActionController::TestCase
      assert_not_nil assigns(:api_client)
    end
  
+  test "login with remote param returns a salted token" do
+    authorize_with :inactive
+    api_client_page = 'http://client.example.com/home'
+    remote_prefix = 'zbbbb'
+    get :login, return_to: api_client_page, remote: remote_prefix
+    assert_response :redirect
+    api_client_auth = assigns(:api_client_auth)
+    assert_not_nil api_client_auth
+    assert_includes(@response.redirect_url, 'api_token='+api_client_auth.salted_token(remote: remote_prefix))
+  end
+
+  test "login with malformed remote param returns an error" do
+    authorize_with :inactive
+    api_client_page = 'http://client.example.com/home'
+    remote_prefix = 'invalid_cluster_id'
+    get :login, return_to: api_client_page, remote: remote_prefix
+    assert_response 400
+  end
  end
diff --git a/services/api/test/integration/user_sessions_test.rb b/services/api/test/integration/user_sessions_test.rb

index 0497c6a7d56294ae3d0841db5acd8ef9a441d809..f5085999ec5681d9da56f5244bd204e5473246dd 100644 (file)
--- a/services/api/test/integration/user_sessions_test.rb
+++ b/services/api/test/integration/user_sessions_test.rb
@@ -5,11 +5,15 @@
  require 'test_helper'
  
  class UserSessionsApiTest < ActionDispatch::IntegrationTest
-  def client_url
-    'https://wb.example.com'
+  # remote prefix & return url packed into the return_to param passed around
+  # between API and SSO provider.
+  def client_url(remote: nil)
+    url = ',https://wb.example.com'
+    url = "#{remote}#{url}" unless remote.nil?
+    url
    end
  
-  def mock_auth_with(email: nil, username: nil, identity_url: nil)
+  def mock_auth_with(email: nil, username: nil, identity_url: nil, remote: nil, expected_response: :redirect)
      mock = {
        'provider' => 'josh_id',
        'uid' => 'https://edward.example.com',
@@ -24,9 +28,14 @@ class UserSessionsApiTest < ActionDispatch::IntegrationTest
      mock['info']['username'] = username unless username.nil?
      mock['info']['identity_url'] = identity_url unless identity_url.nil?
      post('/auth/josh_id/callback',
-         {return_to: client_url},
+         {return_to: client_url(remote: remote)},
           {'omniauth.auth' => mock})
-    assert_response :redirect, 'Did not redirect to client with token'
+
+    errors = {
+      :redirect => 'Did not redirect to client with token',
+      400 => 'Did not return Bad Request error',
+    }
+    assert_response expected_response, errors[expected_response]
    end
  
    test 'assign username from sso' do
@@ -61,14 +70,25 @@ class UserSessionsApiTest < ActionDispatch::IntegrationTest
  
    test 'create new user during omniauth callback' do
      mock_auth_with(email: 'edward@example.com')
-    assert_equal(0, @response.redirect_url.index(client_url),
+    assert_equal(0, @response.redirect_url.index(client_url.split(',', 2)[1]),
                   'Redirected to wrong address after succesful login: was ' +
-                 @response.redirect_url + ', expected ' + client_url + '[...]')
+                 @response.redirect_url + ', expected ' + client_url.split(',', 2)[1] + '[...]')
      assert_not_nil(@response.redirect_url.index('api_token='),
                     'Expected api_token in query string of redirect url ' +
                     @response.redirect_url)
    end
  
+  test 'issue salted token from omniauth callback with remote param' do
+    mock_auth_with(email: 'edward@example.com', remote: 'zbbbb')
+    api_client_auth = assigns(:api_client_auth)
+    assert_not_nil api_client_auth
+    assert_includes(@response.redirect_url, 'api_token=' + api_client_auth.salted_token(remote: 'zbbbb'))
+  end
+
+  test 'error out from omniauth callback with invalid remote param' do
+    mock_auth_with(email: 'edward@example.com', remote: 'invalid_cluster_id', expected_response: 400)
+  end
+
    # Test various combinations of auto_setup configuration and email
    # address provided during a new user's first session setup.
    [{result: :nope, email: nil, cfg: {auto: true, repo: true, vm: true}},
diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py

index fb282d1aaa76a91cd58b9a5a15a28e7b263a1c4c..bed81ad7273d3fb4f16c295447dba9c0bb5d9dce 100644 (file)
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -158,7 +158,7 @@ class FuseMagicTest(MountTestBase):
          self.assertIn(self.testcollection,
                        llfuse.listdir(os.path.join(self.mounttmp, 'by_id')))
          self.assertIn(self.test_project, mount_ls)
-        self.assertIn(self.test_project, 
+        self.assertIn(self.test_project,
                        llfuse.listdir(os.path.join(self.mounttmp, 'by_id')))
  
          with self.assertRaises(OSError):
@@ -615,9 +615,10 @@ class FuseRmTest(MountTestBase):
              r'\./testdir 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$')
          self.pool.apply(fuseRmTestHelperDeleteFile, (self.mounttmp,))
  
-        # Can't have empty directories :-( so manifest will be empty.
+        # Empty directories are represented by an empty file named "."
          collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
-        self.assertEqual(collection2["manifest_text"], "")
+        self.assertRegexpMatches(collection2["manifest_text"],
+                                 r'./testdir d41d8cd98f00b204e9800998ecf8427e\+0\+A\S+ 0:0:\\056\n')
  
          self.pool.apply(fuseRmTestHelperRmdir, (self.mounttmp,))
  
@@ -674,7 +675,7 @@ class FuseMvFileTest(MountTestBase):
  
          collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
          self.assertRegexpMatches(collection2["manifest_text"],
-            r'\. 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$')
+            r'\. 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt\n\./testdir d41d8cd98f00b204e9800998ecf8427e\+0\+A\S+ 0:0:\\056\n')
  
  
  def fuseRenameTestHelper(mounttmp):
author	Tom Clegg <tclegg@veritasgenetics.com>
	Fri, 1 Feb 2019 21:29:17 +0000 (16:29 -0500)
committer	Tom Clegg <tclegg@veritasgenetics.com>
	Fri, 1 Feb 2019 21:29:17 +0000 (16:29 -0500)
apps/workbench/app/assets/javascripts/components/edit_tags.js		patch \| blob \| history
apps/workbench/app/controllers/work_units_controller.rb		patch \| blob \| history
apps/workbench/app/models/collection.rb		patch \| blob \| history
build/run-tests.sh		patch \| blob \| history
lib/controller/handler_test.go		patch \| blob \| history
sdk/python/arvados/_normalize_stream.py		patch \| blob \| history
sdk/python/arvados/collection.py		patch \| blob \| history
sdk/python/tests/test_collections.py		patch \| blob \| history
sdk/ruby/lib/arvados/keep.rb		patch \| blob \| history
sdk/ruby/test/test_keep_manifest.rb		patch \| blob \| history
services/api/app/controllers/user_sessions_controller.rb		patch \| blob \| history
services/api/app/models/api_client_authorization.rb		patch \| blob \| history
services/api/test/functional/user_sessions_controller_test.rb		patch \| blob \| history
services/api/test/integration/user_sessions_test.rb		patch \| blob \| history
services/fuse/tests/test_mount.py		patch \| blob \| history