From: Peter Amstutz <peter.amstutz@curoverse.com>
Date: Fri, 7 Jul 2017 15:27:07 +0000 (-0400)
Subject: Merge branch '10584-fuse-stop-threads' refs #10584
X-Git-Tag: 1.1.0~150
X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/1e31815d4a0d094633d4acb4f6265d6b8b6e3246?hp=a5b458cad80a34a6e90745d47c7e522cbbeedb6b

Merge branch '10584-fuse-stop-threads' refs #10584

Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curoverse.com>
---

diff --git a/.licenseignore b/.licenseignore
index 1d0356b51e..51980b16c2 100644
--- a/.licenseignore
+++ b/.licenseignore
@@ -1,7 +1,9 @@
 *agpl-3.0.html
 *agpl-3.0.txt
+apache-2.0.txt
 apps/workbench/app/assets/javascripts/list.js
 apps/workbench/public/webshell/*
+AUTHORS
 */bootstrap.css
 */bootstrap.js
 *bootstrap-theme.css
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000000..ad72aea0aa
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,18 @@
+# Names should be added to this file with this pattern:
+#
+# For individuals:
+#   Name <email address>
+#
+# For organizations:
+#   Organization <fnmatch pattern>
+#
+# See python fnmatch module documentation for more information.
+
+Curoverse, Inc. <*@curoverse.com>
+Adam Savitzky <adam.savitzky@gmail.com>
+Colin Nolan <colin.nolan@sanger.ac.uk>
+David <davide.fiorentino.loregio@gmail.com>
+Guillermo Carrasco <guille.ch.88@gmail.com>
+Joshua Randall <joshua.randall@sanger.ac.uk>
+President and Fellows of Harvard College <*@harvard.edu>
+Thomas Mooney <tmooney@genome.wustl.edu>
diff --git a/COPYING b/COPYING
index 2cba2ad20a..61c31397a0 100644
--- a/COPYING
+++ b/COPYING
@@ -1,15 +1,19 @@
-Server-side components of Arvados contained in the apps/ and services/
-directories, including the API Server, Workbench, and Crunch, are licensed
-under the GNU Affero General Public License version 3 (see agpl-3.0.txt).
+Unless indicated otherwise in the header of the file, the files in this
+repository are distributed under one of three different licenses: AGPL-3.0,
+Apache-2.0 or CC-BY-SA-3.0.
 
-The files and directories under the build/, lib/ and tools/ directories are
-licensed under the GNU Affero General Public License version 3 (see
-agpl-3.0.txt).
+Individual files contain an SPDX tag that indicates the license for the file.
+These are the three tags in use:
 
-The Arvados client Software Development Kits contained in the sdk/ directory,
-example scripts in the crunch_scripts/ directory, the files and directories
-under backports/ and docker/, and code samples in the Aravados documentation
-are licensed under the Apache License, Version 2.0 (see LICENSE-2.0.txt).
+    SPDX-License-Identifier: AGPL-3.0
+    SPDX-License-Identifier: Apache-2.0
+    SPDX-License-Identifier: CC-BY-SA-3.0
 
-The Arvados Documentation located in the doc/ directory is licensed under the
-Creative Commons Attribution-Share Alike 3.0 United States (see by-sa-3.0.txt).
+This enables machine processing of license information based on the SPDX
+License Identifiers that are available here: http://spdx.org/licenses/
+
+The full license text for each license is available in this directory:
+
+  AGPL-3.0:     agpl-3.0.txt
+  Apache-2.0:   apache-2.0.txt
+  CC-BY-SA-3.0: cc-by-sa-3.0.txt
diff --git a/LICENSE-2.0.txt b/apache-2.0.txt
similarity index 100%
rename from LICENSE-2.0.txt
rename to apache-2.0.txt
diff --git a/apps/workbench/app/controllers/collections_controller.rb b/apps/workbench/app/controllers/collections_controller.rb
index 99065947b8..f8fcf5108f 100644
--- a/apps/workbench/app/controllers/collections_controller.rb
+++ b/apps/workbench/app/controllers/collections_controller.rb
@@ -115,6 +115,10 @@ class CollectionsController < ApplicationController
   end
 
   def show_file_links
+    if Rails.configuration.keep_web_url || Rails.configuration.keep_web_download_url
+      # show_file will redirect to keep-web's directory listing
+      return show_file
+    end
     Thread.current[:reader_tokens] = [params[:reader_token]]
     return if false.equal?(find_object_by_uuid)
     render layout: false
@@ -283,7 +287,12 @@ class CollectionsController < ApplicationController
   helper_method :download_link
 
   def download_link
-    collections_url + "/download/#{@object.uuid}/#{@search_sharing.first.api_token}/"
+    token = @search_sharing.first.api_token
+    if Rails.configuration.keep_web_url || Rails.configuration.keep_web_download_url
+      keep_web_url(@object.uuid, nil, {path_token: token})
+    else
+      collections_url + "/download/#{@object.uuid}/#{token}/"
+    end
   end
 
   def share
@@ -444,7 +453,7 @@ class CollectionsController < ApplicationController
       uri.path += 't=' + opts[:path_token] + '/'
     end
     uri.path += '_/'
-    uri.path += URI.escape(file)
+    uri.path += URI.escape(file) if file
 
     query = Hash[URI.decode_www_form(uri.query || '')]
     { query_token: 'api_token',
diff --git a/apps/workbench/app/controllers/repositories_controller.rb b/apps/workbench/app/controllers/repositories_controller.rb
index 2a39a94018..5ca6f22b02 100644
--- a/apps/workbench/app/controllers/repositories_controller.rb
+++ b/apps/workbench/app/controllers/repositories_controller.rb
@@ -6,7 +6,7 @@ class RepositoriesController < ApplicationController
   before_filter :set_share_links, if: -> { defined? @object }
 
   def index_pane_list
-    %w(recent help)
+    %w(repositories help)
   end
 
   def show_pane_list
@@ -36,4 +36,72 @@ class RepositoriesController < ApplicationController
   def show_commit
     @commit = params[:commit]
   end
+
+  def all_repos
+    limit = params[:limit].andand.to_i || 100
+    offset = params[:offset].andand.to_i || 0
+    @filters = params[:filters] || []
+
+    if @filters.any?
+      owner_filter = @filters.select do |attr, op, val|
+        (attr == 'owner_uuid')
+      end
+    end
+
+    if !owner_filter.andand.any?
+      filters = @filters + [["owner_uuid", "=", current_user.uuid]]
+      my_repos = Repository.all.order("name ASC").limit(limit).offset(offset).filter(filters).results
+    else      # done fetching all owned repositories
+      my_repos = []
+    end
+
+    if !owner_filter.andand.any?  # if this is next page request, the first page was still fetching "own" repos
+      @filters = @filters.reject do |attr, op, val|
+        (attr == 'owner_uuid') or
+        (attr == 'name') or
+        (attr == 'uuid')
+      end
+    end
+
+    filters = @filters + [["owner_uuid", "!=", current_user.uuid]]
+    other_repos = Repository.all.order("name ASC").limit(limit).offset(offset).filter(filters).results
+
+    @objects = (my_repos + other_repos).first(limit)
+  end
+
+  def find_objects_for_index
+    return if !params[:partial]
+
+    all_repos
+
+    if @objects.any?
+      @next_page_filters = next_page_filters('>=')
+      @next_page_href = url_for(partial: :repositories_rows,
+                                filters: @next_page_filters.to_json)
+    else
+      @next_page_href = nil
+    end
+  end
+
+  def next_page_href with_params={}
+    @next_page_href
+  end
+
+  def next_page_filters nextpage_operator
+    next_page_filters = @filters.reject do |attr, op, val|
+      (attr == 'owner_uuid') or
+      (attr == 'name' and op == nextpage_operator) or
+      (attr == 'uuid' and op == 'not in')
+    end
+
+    if @objects.any?
+      last_obj = @objects.last
+      next_page_filters += [['name', nextpage_operator, last_obj.name]]
+      next_page_filters += [['uuid', 'not in', [last_obj.uuid]]]
+      # if not-owned, it means we are done with owned repos and fetching other repos
+      next_page_filters += [['owner_uuid', '!=', last_obj.uuid]] if last_obj.owner_uuid != current_user.uuid
+    end
+
+    next_page_filters
+  end
 end
diff --git a/apps/workbench/app/controllers/users_controller.rb b/apps/workbench/app/controllers/users_controller.rb
index 4a40f03120..2e3ced69a5 100644
--- a/apps/workbench/app/controllers/users_controller.rb
+++ b/apps/workbench/app/controllers/users_controller.rb
@@ -260,28 +260,6 @@ class UsersController < ApplicationController
     end
   end
 
-  def repositories
-    # all repositories accessible by current user
-    all_repositories = Hash[Repository.all.order('name asc').collect {|repo| [repo.uuid, repo]}]
-
-    @my_repositories = [] # we want them ordered as owned and the rest
-    @repo_writable = {}
-
-    # owned repos
-    all_repositories.each do |_, repo|
-      if repo.owner_uuid == current_user.uuid
-        @repo_writable[repo.uuid] = 'can_write'
-        @my_repositories << repo
-      end
-    end
-
-    # rest of the repos
-    handled = @my_repositories.map(&:uuid)
-    all_repositories.each do |_, repo|
-      @my_repositories << repo if !repo.uuid.in?(handled)
-    end
-  end
-
   def virtual_machines
     @my_vm_logins = {}
     Link.where(tail_uuid: @object.uuid,
diff --git a/apps/workbench/app/views/collections/_show_tags.html.erb b/apps/workbench/app/views/collections/_show_tags.html.erb
index fd734c82dc..afab5266e9 100644
--- a/apps/workbench/app/views/collections/_show_tags.html.erb
+++ b/apps/workbench/app/views/collections/_show_tags.html.erb
@@ -9,7 +9,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
   <div class="collection-tags-container" style="padding-left:2em;padding-right:2em;">
     <% if object.editable? %>
       <p title="Edit tags" id="edit-collection-tags">
-        <a type="button" class="btn btn-primary edit-collection-tags">Edit</a>
+        <a class="btn btn-primary edit-collection-tags">Edit</a>
       </p>
     <% end %>
 
diff --git a/apps/workbench/app/views/layouts/body.html.erb b/apps/workbench/app/views/layouts/body.html.erb
index 8073615c31..3315027b3d 100644
--- a/apps/workbench/app/views/layouts/body.html.erb
+++ b/apps/workbench/app/views/layouts/body.html.erb
@@ -66,11 +66,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
                     <i class="fa fa-lg fa-terminal fa-fw"></i> Virtual machines
                   <% end %>
                 </li>
-                <li role="menuitem">
-                  <%= link_to repositories_user_path(current_user), role: 'menu-item' do %>
-                    <i class="fa fa-lg fa-code-fork fa-fw"></i> Repositories
-                  <% end %>
-                </li>
+                <li role="menuitem"><a href="/repositories" role="menuitem"><i class="fa fa-lg fa-code-fork fa-fw"></i> Repositories </a></li>
                 <li role="menuitem"><a href="/current_token" role="menuitem"><i class="fa fa-lg fa-ticket fa-fw"></i> Current token</a></li>
                 <li role="menuitem">
                   <%= link_to ssh_keys_user_path(current_user), role: 'menu-item' do %>
diff --git a/apps/workbench/app/views/users/_add_repository_modal.html.erb b/apps/workbench/app/views/repositories/_add_repository_modal.html.erb
similarity index 100%
rename from apps/workbench/app/views/users/_add_repository_modal.html.erb
rename to apps/workbench/app/views/repositories/_add_repository_modal.html.erb
diff --git a/apps/workbench/app/views/repositories/_show_help.html.erb b/apps/workbench/app/views/repositories/_show_help.html.erb
index 7980738b55..5904fb29db 100644
--- a/apps/workbench/app/views/repositories/_show_help.html.erb
+++ b/apps/workbench/app/views/repositories/_show_help.html.erb
@@ -2,7 +2,13 @@
 
 SPDX-License-Identifier: AGPL-3.0 %>
 
-<% if (example = @objects.select(&:push_url).first) %>
+<%
+    filters = @filters + [["owner_uuid", "=", current_user.uuid]]
+    example = Repository.all.order("name ASC").filter(filters).limit(1).results.first
+    example = Repository.all.order("name ASC").limit(1).results.first if !example
+%>
+
+<% if example %>
 
 <p>
 Sample git quick start:
diff --git a/apps/workbench/app/views/repositories/_show_repositories.html.erb b/apps/workbench/app/views/repositories/_show_repositories.html.erb
new file mode 100644
index 0000000000..871ba1d365
--- /dev/null
+++ b/apps/workbench/app/views/repositories/_show_repositories.html.erb
@@ -0,0 +1,46 @@
+<%# Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: AGPL-3.0 %>
+
+<%= render partial: "add_repository_modal" %>
+
+<div class="container" style="width: 100%">
+  <div class="row">
+    <div class="col-md-pull-9 pull-left">
+      <p>
+        When you are using an Arvados virtual machine, you should clone the https:// URLs. This will authenticate automatically using your API token.
+      </p>
+      <p>
+        In order to clone git repositories using SSH, <%= link_to ssh_keys_user_path(current_user) do%> add an SSH key to your account<%end%> and clone the git@ URLs.
+      </p>
+    </div>
+    <div class="col-md-pull-3 pull-right">
+      <%= link_to raw('<i class="fa fa-plus"></i> Add new repository'), "#",
+                      {class: 'btn btn-xs btn-primary', 'data-toggle' => "modal",
+                       'data-target' => '#add-repository-modal'}  %>
+    </div>
+  </div>
+
+  <div>
+    <table class="table table-condensed table-fixedlayout repositories-table">
+      <colgroup>
+        <col style="width: 10%" />
+        <col style="width: 30%" />
+        <col style="width: 55%" />
+        <col style="width: 5%" />
+      </colgroup>
+      <thead>
+        <tr>
+          <th></th>
+          <th> Name </th>
+          <th> URL </th>
+          <th></th>
+        </tr>
+      </thead>
+
+      <tbody data-infinite-scroller="#repositories-rows" id="repositories-rows"
+        data-infinite-content-href="<%= url_for partial: :repositories_rows %>" >
+      </tbody>
+    </table>
+  </div>
+</div>
diff --git a/apps/workbench/app/views/repositories/_show_repositories_rows.html.erb b/apps/workbench/app/views/repositories/_show_repositories_rows.html.erb
new file mode 100644
index 0000000000..fe88608d78
--- /dev/null
+++ b/apps/workbench/app/views/repositories/_show_repositories_rows.html.erb
@@ -0,0 +1,23 @@
+<%# Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: AGPL-3.0 %>
+
+<% @objects.each do |repo| %>
+  <tr data-object-uuid="<%= repo.uuid %>">
+    <td>
+      <%= render :partial => "show_object_button", :locals => {object: repo, size: 'xs' } %>
+    </td>
+    <td style="word-break:break-all;">
+      <%= repo[:name] %>
+    </td>
+    <td style="word-break:break-all;">
+      <code><%= repo.http_fetch_url %></code><br/>
+      <code><%= repo.editable? ? repo.push_url : repo.fetch_url %></code>
+    </td>
+    <td>
+      <% if repo.editable? %>
+        <%= render partial: 'delete_object_button', locals: {object: repo} %>
+      <% end %>
+    </td>
+  </tr>
+<% end %>
diff --git a/apps/workbench/app/views/users/_repositories.html.erb b/apps/workbench/app/views/users/_repositories.html.erb
deleted file mode 100644
index 057b6a2138..0000000000
--- a/apps/workbench/app/views/users/_repositories.html.erb
+++ /dev/null
@@ -1,71 +0,0 @@
-<%# Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: AGPL-3.0 %>
-
-<div class="panel panel-default">
-  <div class="panel-heading">
-    <div class="pull-right">
-      <%= link_to raw('<i class="fa fa-plus"></i> Add new repository'), "#",
-                   {class: 'btn btn-xs btn-primary', 'data-toggle' => "modal",
-                    'data-target' => '#add-repository-modal'}  %>
-    </div>
-    <h4 class="panel-title">
-      <%= link_to repositories_user_path(current_user) do%>
-        Repositories
-      <%end%>
-    </h4>
-  </div>
-
-  <div id="manage_repositories" class="panel-body">
-    <p>
-      When you are using an Arvados virtual machine, you should clone the https:// URLs. This will authenticate automatically using your API token.
-    </p>
-    <p>
-      In order to clone git repositories using SSH, <%= link_to ssh_keys_user_path(current_user) do%> add an SSH key to your account<%end%> and clone the git@ URLs.
-    </p>
-
-    <% if !@my_repositories.any? %>
-      You do not seem to have access to any repositories. If you would like to request access, please contact your system admin.
-    <% else %>
-      <table class="table repositories-table">
-        <colgroup>
-          <col style="width: 5%" />
-          <col style="width: 30%" />
-          <col style="width: 60%" />
-          <col style="width: 5%" />
-        </colgroup>
-        <thead>
-          <tr>
-            <th></th>
-            <th> Name </th>
-            <th> URL </th>
-            <th></th>
-          </tr>
-        </thead>
-        <tbody>
-          <% @my_repositories.andand.each do |repo| %>
-            <tr>
-              <td>
-                <%= render :partial => "show_object_button", :locals => {object: repo, size: 'xs' } %>
-              </td>
-              <td style="word-break:break-all;">
-                <%= repo[:name] %>
-              </td>
-              <td style="word-break:break-all;">
-                <code><%= repo.http_fetch_url %></code><br/>
-                <code><%= @repo_writable[repo.uuid] ? repo.push_url : repo.fetch_url %></code>
-              </td>
-              <td>
-                <% if repo.editable? %>
-                    <%= link_to(repository_path(id: repo.uuid), method: :delete, class: 'btn btn-sm', data: {confirm: "Really delete '#{repo.name || repo.uuid}'?"}) do %>
-                        <i class="fa fa-fw fa-trash-o"></i>
-                    <% end %>
-                <% end %>
-              </td>
-            </tr>
-          <% end %>
-        </tbody>
-      </table>
-    <% end %>
-  </div>
-</div>
diff --git a/apps/workbench/app/views/users/repositories.html.erb b/apps/workbench/app/views/users/repositories.html.erb
deleted file mode 100644
index 8a1a3b99d3..0000000000
--- a/apps/workbench/app/views/users/repositories.html.erb
+++ /dev/null
@@ -1,6 +0,0 @@
-<%# Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: AGPL-3.0 %>
-
-<%= render :partial => 'repositories' %>
-<%= render partial: "add_repository_modal" %>
diff --git a/apps/workbench/test/controllers/repositories_controller_test.rb b/apps/workbench/test/controllers/repositories_controller_test.rb
index 561723da54..99e7285b3b 100644
--- a/apps/workbench/test/controllers/repositories_controller_test.rb
+++ b/apps/workbench/test/controllers/repositories_controller_test.rb
@@ -125,4 +125,20 @@ class RepositoriesControllerTest < ActionController::TestCase
       assert_select 'tr td', 'COPYING'
     end
   end
+
+  test "get repositories lists linked as well as owned repositories" do
+    params = {
+      partial: :repositories_rows,
+      format: :json,
+    }
+    get :index, params, session_for(:active)
+    assert_response :success
+    repos = assigns(:objects)
+    assert repos
+    assert_not_empty repos, "my_repositories should not be empty"
+    repo_uuids = repos.map(&:uuid)
+    assert_includes repo_uuids, api_fixture('repositories')['repository2']['uuid']  # owned by active
+    assert_includes repo_uuids, api_fixture('repositories')['repository4']['uuid']  # shared with active
+    assert_includes repo_uuids, api_fixture('repositories')['arvados']['uuid']      # shared with all_users
+  end
 end
diff --git a/apps/workbench/test/controllers/users_controller_test.rb b/apps/workbench/test/controllers/users_controller_test.rb
index ee64969f76..ce9282ff77 100644
--- a/apps/workbench/test/controllers/users_controller_test.rb
+++ b/apps/workbench/test/controllers/users_controller_test.rb
@@ -35,29 +35,6 @@ class UsersControllerTest < ActionController::TestCase
     assert_match /\/users\/welcome/, @response.redirect_url
   end
 
-  test "show repositories with read, write, or manage permission" do
-    get :repositories, {id: api_fixture("users")['active']['uuid']}, session_for(:active)
-    assert_response :success
-    repos = assigns(:my_repositories)
-    assert repos
-    assert_not_empty repos, "my_repositories should not be empty"
-    editables = repos.collect { |r| !!assigns(:repo_writable)[r.uuid] }
-    assert_includes editables, true, "should have a writable repository"
-    assert_includes editables, false, "should have a readonly repository"
-  end
-
-  test "show repositories lists linked as well as owned repositories" do
-    get :repositories, {id: api_fixture("users")['active']['uuid']}, session_for(:active)
-    assert_response :success
-    repos = assigns(:my_repositories)
-    assert repos
-    assert_not_empty repos, "my_repositories should not be empty"
-    repo_uuids = repos.map(&:uuid)
-    assert_includes repo_uuids, api_fixture('repositories')['repository2']['uuid']  # owned by active
-    assert_includes repo_uuids, api_fixture('repositories')['repository4']['uuid']  # shared with active
-    assert_includes repo_uuids, api_fixture('repositories')['arvados']['uuid']      # shared with all_users
-  end
-
   test "request shell access" do
     user = api_fixture('users')['spectator']
 
diff --git a/apps/workbench/test/integration/application_layout_test.rb b/apps/workbench/test/integration/application_layout_test.rb
index e777ebe784..7692d8e5dc 100644
--- a/apps/workbench/test/integration/application_layout_test.rb
+++ b/apps/workbench/test/integration/application_layout_test.rb
@@ -56,7 +56,7 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
 
             assert_selector "a[href=\"/projects/#{user['uuid']}\"]", text: 'Home project'
             assert_selector "a[href=\"/users/#{user['uuid']}/virtual_machines\"]", text: 'Virtual machines'
-            assert_selector "a[href=\"/users/#{user['uuid']}/repositories\"]", text: 'Repositories'
+            assert_selector "a[href=\"/repositories\"]", text: 'Repositories'
             assert_selector "a[href=\"/current_token\"]", text: 'Current token'
             assert_selector "a[href=\"/users/#{user['uuid']}/ssh_keys\"]", text: 'SSH keys'
 
@@ -214,7 +214,7 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
   end
 
    [
-    ['Repositories', nil, 's0uqq'],
+    ['Repositories', nil, 'active/crunchdispatchtest'],
     ['Virtual machines', nil, 'testvm.shell'],
     ['SSH keys', nil, 'public_key'],
     ['Links', nil, 'link_class'],
diff --git a/apps/workbench/test/integration/collection_upload_test.rb b/apps/workbench/test/integration/collection_upload_test.rb
index e54a5c2185..608cd521de 100644
--- a/apps/workbench/test/integration/collection_upload_test.rb
+++ b/apps/workbench/test/integration/collection_upload_test.rb
@@ -96,11 +96,11 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
   test "Report network error" do
     need_selenium "to make file uploads work"
     use_token :admin do
-      # Even if you somehow do port>2^16, surely nx.example.net won't
+      # Even if port 0 is a thing, surely nx.example.net won't
       # respond
       KeepService.where(service_type: 'proxy').first.
         update_attributes(service_host: 'nx.example.net',
-                          service_port: 99999)
+                          service_port: 0)
     end
     visit page_with_token 'active', sandbox_path
 
diff --git a/apps/workbench/test/integration/collections_test.rb b/apps/workbench/test/integration/collections_test.rb
index af8e1313f8..8619858dfe 100644
--- a/apps/workbench/test/integration/collections_test.rb
+++ b/apps/workbench/test/integration/collections_test.rb
@@ -426,8 +426,6 @@ class CollectionsTest < ActionDispatch::IntegrationTest
   end
 
   test "collection tags tab" do
-    need_selenium
-
     visit page_with_token('active', '/collections/zzzzz-4zz18-bv31uwvy3neko21')
 
     click_link 'Tags'
@@ -447,8 +445,6 @@ class CollectionsTest < ActionDispatch::IntegrationTest
     assert_selector 'a', text: 'Cancel'
 
     # add two tags
-    first('.edit-collection-tags').click
-
     first('.glyphicon-plus').click
     first('.collection-tag-field-key').click
     first('.collection-tag-field-key').set('key 1')
diff --git a/apps/workbench/test/integration/download_test.rb b/apps/workbench/test/integration/download_test.rb
index 37faef9c3d..407458b62b 100644
--- a/apps/workbench/test/integration/download_test.rb
+++ b/apps/workbench/test/integration/download_test.rb
@@ -8,6 +8,8 @@ require 'helpers/download_helper'
 class DownloadTest < ActionDispatch::IntegrationTest
   include KeepWebConfig
 
+  @@wrote_test_data = false
+
   setup do
     use_keep_web_config
 
@@ -17,10 +19,13 @@ class DownloadTest < ActionDispatch::IntegrationTest
 
     # Keep data isn't populated by fixtures, so we have to write any
     # data we expect to read.
-    ['foo', 'w a z', "Hello world\n"].each do |data|
-      md5 = `echo -n #{data.shellescape} | arv-put --no-progress --raw -`
-      assert_match /^#{Digest::MD5.hexdigest(data)}/, md5
-      assert $?.success?, $?
+    if !@@wrote_test_data
+      ['foo', 'w a z', "Hello world\n"].each do |data|
+        md5 = `echo -n #{data.shellescape} | arv-put --no-progress --raw -`
+        assert_match /^#{Digest::MD5.hexdigest(data)}/, md5
+        assert $?.success?, $?
+      end
+      @@wrote_test_data = true
     end
   end
 
@@ -29,7 +34,7 @@ class DownloadTest < ActionDispatch::IntegrationTest
       uuid_or_pdh = api_fixture('collections')['foo_file'][id_type]
       token = api_fixture('api_client_authorizations')['active_all_collections']['api_token']
       visit "/collections/download/#{uuid_or_pdh}/#{token}/"
-      within "#collection_files" do
+      within 'ul' do
         click_link 'foo'
       end
       assert_no_selector 'a'
diff --git a/apps/workbench/test/integration/user_settings_menu_test.rb b/apps/workbench/test/integration/user_settings_menu_test.rb
index 75009f709c..6a0e46e26a 100644
--- a/apps/workbench/test/integration/user_settings_menu_test.rb
+++ b/apps/workbench/test/integration/user_settings_menu_test.rb
@@ -114,7 +114,7 @@ class UserSettingsMenuTest < ActionDispatch::IntegrationTest
   end
 
   test "verify repositories for active user" do
-    visit page_with_token('active',"/users/#{api_fixture('users')['active']['uuid']}/repositories")
+    visit page_with_token('active',"/repositories")
 
     repos = [[api_fixture('repositories')['foo'], true],
              [api_fixture('repositories')['repository3'], false],
@@ -126,9 +126,9 @@ class UserSettingsMenuTest < ActionDispatch::IntegrationTest
         assert_text repo['name']
         assert_selector 'a', text:'Show'
         if owned
-          assert_not_nil first('.fa-trash-o')
+          assert_not_nil first('.glyphicon-trash')
         else
-          assert_nil first('.fa-trash-o')
+          assert_nil first('.glyphicon-trash')
         end
       end
     end
@@ -193,12 +193,12 @@ class UserSettingsMenuTest < ActionDispatch::IntegrationTest
 
   [
     ['virtual_machines', nil, 'Host name', 'testvm2.shell'],
-    ['repositories', 'Add new repository', 'It may take a minute or two before you can clone your new repository.', 'active/foo'],
+    ['/repositories', 'Add new repository', 'It may take a minute or two before you can clone your new repository.', 'active/foo'],
     ['/current_token', nil, 'HISTIGNORE=$HISTIGNORE', 'ARVADOS_API_TOKEN=3kg6k6lzmp9kj5'],
     ['ssh_keys', 'Add new SSH key', 'Click here to learn about SSH keys in Arvados.', 'active'],
   ].each do |page_name, button_name, look_for, content|
     test "test user settings menu for page #{page_name}" do
-      if page_name == '/current_token'
+      if page_name == '/current_token' || page_name == '/repositories'
         visit page_with_token('active', page_name)
       else
         visit page_with_token('active', "/users/#{api_fixture('users')['active']['uuid']}/#{page_name}")
@@ -216,7 +216,7 @@ class UserSettingsMenuTest < ActionDispatch::IntegrationTest
 
   [
     ['virtual_machines', 'You do not have access to any virtual machines.'],
-    ['/repositories', api_fixture('repositories')['arvados']['uuid']],
+    ['/repositories', api_fixture('repositories')['arvados']['name']],
     ['/current_token', 'HISTIGNORE=$HISTIGNORE'],
     ['ssh_keys', 'You have not yet set up an SSH public key for use with Arvados.'],
   ].each do |page_name, look_for|
diff --git a/build/check-copyright-notices b/build/check-copyright-notices
index cf4e9bf181..5298371bd1 100755
--- a/build/check-copyright-notices
+++ b/build/check-copyright-notices
@@ -173,7 +173,7 @@ ${cc}
 ${cc}${cc:+ }SPDX-License-Identifier: CC-BY-SA-3.0${ce}"
         found=$(head -n20 "$fnm" | egrep -A${grepAfter} -B${grepBefore} 'Copyright.*Arvados' || true)
         case ${fnm} in
-            Makefile | build/* | lib/* | tools/* | apps/* | services/*)
+            Makefile | build/* | lib/* | tools/* | apps/* | services/* | sdk/cli/bin/crunch-job)
                 want=${wantGPL}
                 ;;
             crunch_scripts/* | backports/* | docker/* | sdk/*)
diff --git a/by-sa-3.0.txt b/cc-by-sa-3.0.txt
similarity index 100%
rename from by-sa-3.0.txt
rename to cc-by-sa-3.0.txt
diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance
index bccfdac0b8..b66e9c0526 100755
--- a/sdk/cli/bin/arv-run-pipeline-instance
+++ b/sdk/cli/bin/arv-run-pipeline-instance
@@ -260,31 +260,46 @@ class JobCache
       []
     end
   end
+
+  # create() returns [job, exception]. If both job and exception are
+  # nil, there was a non-retryable error and the call should not be
+  # attempted again.
   def self.create(pipeline, component, job, create_params)
     @cache ||= {}
 
     body = {job: no_nil_values(job)}.merge(no_nil_values(create_params))
 
-    result = $client.execute(:api_method => $arvados.jobs.create,
-                             :body_object => body,
-                             :authenticated => false,
-                             :headers => {
-                               authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
-                             })
-    j = JSON.parse result.body, :symbolize_names => true
-    if j.is_a? Hash and j[:uuid]
+    result = nil
+    begin
+      result = $client.execute(
+        :api_method => $arvados.jobs.create,
+        :body_object => body,
+        :authenticated => false,
+        :headers => {
+          authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN']
+        })
+      if result.status == 429 || result.status >= 500
+        raise Exception.new("HTTP status #{result.status}")
+      end
+    rescue Exception => e
+      return nil, e
+    end
+    j = JSON.parse(result.body, :symbolize_names => true) rescue nil
+    if result.status == 200 && j.is_a?(Hash) && j[:uuid]
       @cache[j[:uuid]] = j
+      return j, nil
     else
-      debuglog "create job: #{j[:errors] rescue nil} with attributes #{body}", 0
+      errors = j[:errors] rescue []
+      debuglog "create job: [#{result.status}] #{errors.inspect} with attributes #{body}", 0
 
       msg = ""
-      j[:errors].each do |err|
+      errors.each do |err|
         msg += "Error creating job for component #{component}: #{err}\n"
       end
       msg += "Job submission was: #{body.to_json}"
 
       pipeline.log_stderr(msg)
-      nil
+      return nil, nil
     end
   end
 
@@ -396,7 +411,10 @@ class WhRunPipelineInstance
       end
     end
     if !errors.empty?
-      abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nErrors:\n#{errors.collect { |c,p,e| "#{c}::#{p} - #{e}\n" }.join ""}"
+      all_errors = errors.collect do |c,p,e|
+        "#{c}::#{p} - #{e}\n"
+      end.join("")
+      abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nErrors:\n#{all_errors}"
     end
     debuglog "options=" + @options.pretty_inspect
     self
@@ -463,7 +481,7 @@ class WhRunPipelineInstance
           # are fully specified (any output_of script_parameters are resolved
           # to real value)
           my_submit_id = "instance #{@instance[:uuid]} rand #{rand(2**64).to_s(36)}"
-          job = JobCache.create(@instance, cname, {
+          job, err = JobCache.create(@instance, cname, {
             :script => c[:script],
             :script_parameters => Hash[c[:script_parameters].map do |key, spec|
                                          [key, spec[:value]]
@@ -490,9 +508,11 @@ class WhRunPipelineInstance
             c[:job] = job
             c[:run_in_process] = (@options[:run_jobs_here] and
                                   job[:submit_id] == my_submit_id)
-          else
+          elsif err.nil?
             debuglog "component #{cname} new job failed", 0
             job_creation_failed += 1
+          else
+            debuglog "component #{cname} new job failed, err=#{err}", 0
           end
         end
 
@@ -657,7 +677,7 @@ class WhRunPipelineInstance
         @instance[:state] = 'Complete'
      else
         @instance[:state] = 'Paused'
-      end
+     end
     else
       if ended == @components.length or failed > 0
         @instance[:state] = success ? 'Complete' : 'Failed'
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job
index c0e3075c9b..afca52cb73 100755
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -1,7 +1,7 @@
 #!/usr/bin/env perl
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
-# SPDX-License-Identifier: Apache-2.0
+# SPDX-License-Identifier: AGPL-3.0
 
 # -*- mode: perl; perl-indent-level: 2; indent-tabs-mode: nil; -*-
 
diff --git a/sdk/cwl/test_with_arvbox.sh b/sdk/cwl/test_with_arvbox.sh
index 88860c04eb..236658c1f1 100755
--- a/sdk/cwl/test_with_arvbox.sh
+++ b/sdk/cwl/test_with_arvbox.sh
@@ -48,6 +48,8 @@ if test -z "$ARVBOX_CONTAINER" ; then
 fi
 
 if test $reset_container = 1 ; then
+    arvbox stop
+    docker rm $ARVBOX_CONTAINER
     arvbox reset -f
 fi
 
@@ -81,7 +83,7 @@ export ARVADOS_API_TOKEN=\$(cat /var/lib/arvados/superuser_token)
 if test "$tag" = "latest" ; then
   arv-keepdocker --pull arvados/jobs $tag
 else
-  jobsimg=$(curl http://versions.arvados.org/v1/commit/$tag | python -c "import json; import sys; sys.stdout.write(json.load(sys.stdin)['Versions']['Docker']['arvados/jobs'])")
+  jobsimg=\$(curl http://versions.arvados.org/v1/commit/$tag | python -c "import json; import sys; sys.stdout.write(json.load(sys.stdin)['Versions']['Docker']['arvados/jobs'])")
   arv-keepdocker --pull arvados/jobs $jobsimg
   docker tag -f arvados/jobs:$jobsimg arvados/jobs:latest
   arv-keepdocker arvados/jobs latest
diff --git a/sdk/cwl/tests/noreuse.cwl b/sdk/cwl/tests/noreuse.cwl
index 46771d1101..4c95eb6817 100644
--- a/sdk/cwl/tests/noreuse.cwl
+++ b/sdk/cwl/tests/noreuse.cwl
@@ -1,3 +1,7 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
 cwlVersion: v1.0
 class: Workflow
 $namespaces:
diff --git a/sdk/cwl/tests/stdout.cwl b/sdk/cwl/tests/stdout.cwl
index c76a6edec4..2100c37ff1 100644
--- a/sdk/cwl/tests/stdout.cwl
+++ b/sdk/cwl/tests/stdout.cwl
@@ -1,3 +1,7 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
 cwlVersion: v1.0
 class: CommandLineTool
 baseCommand: echo
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py
index 548f4b0948..68f63b1c26 100644
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -14,6 +14,7 @@ import copy
 import datetime
 import errno
 import fcntl
+import fnmatch
 import hashlib
 import json
 import logging
@@ -160,6 +161,18 @@ run_opts.add_argument('--name', help="""
 Save the collection with the specified name.
 """)
 
+run_opts.add_argument('--exclude', metavar='PATTERN', default=[],
+                      action='append', help="""
+Exclude files and directories whose names match the given glob pattern. When
+using a path-like pattern like 'subdir/*.txt', all text files inside 'subdir'
+directory, relative to the provided input dirs will be excluded.
+When using a filename pattern like '*.txt', any text file will be excluded
+no matter where is placed.
+For the special case of needing to exclude only files or dirs directly below
+the given input directory, you can use a pattern like './exclude_this.gif'.
+You can specify multiple patterns by using this argument more than once.
+""")
+
 _group = run_opts.add_mutually_exclusive_group()
 _group.add_argument('--progress', action='store_true',
                     help="""
@@ -248,6 +261,10 @@ def parse_arguments(arguments):
         if not args.filename:
             args.filename = 'stdin'
 
+    # Remove possible duplicated patterns
+    if len(args.exclude) > 0:
+        args.exclude = list(set(args.exclude))
+
     return args
 
 
@@ -376,18 +393,20 @@ class ArvPutUploadJob(object):
     }
 
     def __init__(self, paths, resume=True, use_cache=True, reporter=None,
-                 bytes_expected=None, name=None, owner_uuid=None,
+                 name=None, owner_uuid=None,
                  ensure_unique_name=False, num_retries=None,
                  put_threads=None, replication_desired=None,
                  filename=None, update_time=60.0, update_collection=None,
                  logger=logging.getLogger('arvados.arv_put'), dry_run=False,
-                 follow_links=True):
+                 follow_links=True, exclude_paths=[], exclude_names=None):
         self.paths = paths
         self.resume = resume
         self.use_cache = use_cache
         self.update = False
         self.reporter = reporter
-        self.bytes_expected = bytes_expected
+        # This will set to 0 before start counting, if no special files are going
+        # to be read.
+        self.bytes_expected = None
         self.bytes_written = 0
         self.bytes_skipped = 0
         self.name = name
@@ -415,6 +434,8 @@ class ArvPutUploadJob(object):
         self.dry_run = dry_run
         self._checkpoint_before_quit = True
         self.follow_links = follow_links
+        self.exclude_paths = exclude_paths
+        self.exclude_names = exclude_names
 
         if not self.use_cache and self.resume:
             raise ArvPutArgumentConflict('resume cannot be True when use_cache is False')
@@ -426,55 +447,101 @@ class ArvPutUploadJob(object):
         # Load cached data if any and if needed
         self._setup_state(update_collection)
 
+        # Build the upload file list, excluding requested files and counting the
+        # bytes expected to be uploaded.
+        self._build_upload_list()
+
+    def _build_upload_list(self):
+        """
+        Scan the requested paths to count file sizes, excluding files & dirs if requested
+        and building the upload file list.
+        """
+        # If there aren't special files to be read, reset total bytes count to zero
+        # to start counting.
+        if not any([p for p in self.paths
+                    if not (os.path.isfile(p) or os.path.isdir(p))]):
+            self.bytes_expected = 0
+
+        for path in self.paths:
+            # Test for stdin first, in case some file named '-' exist
+            if path == '-':
+                if self.dry_run:
+                    raise ArvPutUploadIsPending()
+                self._write_stdin(self.filename or 'stdin')
+            elif not os.path.exists(path):
+                 raise PathDoesNotExistError("file or directory '{}' does not exist.".format(path))
+            elif os.path.isdir(path):
+                # Use absolute paths on cache index so CWD doesn't interfere
+                # with the caching logic.
+                orig_path = path
+                path = os.path.abspath(path)
+                if orig_path[-1:] == os.sep:
+                    # When passing a directory reference with a trailing slash,
+                    # its contents should be uploaded directly to the
+                    # collection's root.
+                    prefixdir = path
+                else:
+                    # When passing a directory reference with no trailing slash,
+                    # upload the directory to the collection's root.
+                    prefixdir = os.path.dirname(path)
+                prefixdir += os.sep
+                for root, dirs, files in os.walk(path,
+                                                 followlinks=self.follow_links):
+                    root_relpath = os.path.relpath(root, path)
+                    if root_relpath == '.':
+                        root_relpath = ''
+                    # Exclude files/dirs by full path matching pattern
+                    if self.exclude_paths:
+                        dirs[:] = [d for d in dirs
+                                   if not any(pathname_match(
+                                           os.path.join(root_relpath, d), pat)
+                                              for pat in self.exclude_paths)]
+                        files = [f for f in files
+                                 if not any(pathname_match(
+                                         os.path.join(root_relpath, f), pat)
+                                            for pat in self.exclude_paths)]
+                    # Exclude files/dirs by name matching pattern
+                    if self.exclude_names is not None:
+                        dirs[:] = [d for d in dirs
+                                   if not self.exclude_names.match(d)]
+                        files = [f for f in files
+                                 if not self.exclude_names.match(f)]
+                    # Make os.walk()'s dir traversing order deterministic
+                    dirs.sort()
+                    files.sort()
+                    for f in files:
+                        filepath = os.path.join(root, f)
+                        # Add its size to the total bytes count (if applicable)
+                        if self.follow_links or (not os.path.islink(filepath)):
+                            if self.bytes_expected is not None:
+                                self.bytes_expected += os.path.getsize(filepath)
+                        self._check_file(filepath,
+                                         os.path.join(root[len(prefixdir):], f))
+            else:
+                filepath = os.path.abspath(path)
+                # Add its size to the total bytes count (if applicable)
+                if self.follow_links or (not os.path.islink(filepath)):
+                    if self.bytes_expected is not None:
+                        self.bytes_expected += os.path.getsize(filepath)
+                self._check_file(filepath,
+                                 self.filename or os.path.basename(path))
+        # If dry-mode is on, and got up to this point, then we should notify that
+        # there aren't any file to upload.
+        if self.dry_run:
+            raise ArvPutUploadNotPending()
+        # Remove local_collection's files that don't exist locally anymore, so the
+        # bytes_written count is correct.
+        for f in self.collection_file_paths(self._local_collection,
+                                            path_prefix=""):
+            if f != 'stdin' and f != self.filename and not f in self._file_paths:
+                self._local_collection.remove(f)
+
     def start(self, save_collection):
         """
         Start supporting thread & file uploading
         """
-        if not self.dry_run:
-            self._checkpointer.start()
+        self._checkpointer.start()
         try:
-            for path in self.paths:
-                # Test for stdin first, in case some file named '-' exist
-                if path == '-':
-                    if self.dry_run:
-                        raise ArvPutUploadIsPending()
-                    self._write_stdin(self.filename or 'stdin')
-                elif not os.path.exists(path):
-                     raise PathDoesNotExistError("file or directory '{}' does not exist.".format(path))
-                elif os.path.isdir(path):
-                    # Use absolute paths on cache index so CWD doesn't interfere
-                    # with the caching logic.
-                    orig_path = path
-                    path = os.path.abspath(path)
-                    if orig_path[-1:] == os.sep:
-                        # When passing a directory reference with a trailing slash,
-                        # its contents should be uploaded directly to the collection's root.
-                        prefixdir = path
-                    else:
-                        # When passing a directory reference with no trailing slash,
-                        # upload the directory to the collection's root.
-                        prefixdir = os.path.dirname(path)
-                    prefixdir += os.sep
-                    for root, dirs, files in os.walk(path, followlinks=self.follow_links):
-                        # Make os.walk()'s dir traversing order deterministic
-                        dirs.sort()
-                        files.sort()
-                        for f in files:
-                            self._check_file(os.path.join(root, f),
-                                             os.path.join(root[len(prefixdir):], f))
-                else:
-                    self._check_file(os.path.abspath(path),
-                                     self.filename or os.path.basename(path))
-            # If dry-mode is on, and got up to this point, then we should notify that
-            # there aren't any file to upload.
-            if self.dry_run:
-                raise ArvPutUploadNotPending()
-            # Remove local_collection's files that don't exist locally anymore, so the
-            # bytes_written count is correct.
-            for f in self.collection_file_paths(self._local_collection,
-                                                path_prefix=""):
-                if f != 'stdin' and f != self.filename and not f in self._file_paths:
-                    self._local_collection.remove(f)
             # Update bytes_written from current local collection and
             # report initial progress.
             self._update()
@@ -660,7 +727,13 @@ class ArvPutUploadJob(object):
             should_upload = True
 
         if should_upload:
-            self._files_to_upload.append((source, resume_offset, filename))
+            try:
+                self._files_to_upload.append((source, resume_offset, filename))
+            except ArvPutUploadIsPending:
+                # This could happen when running on dry-mode, close cache file to
+                # avoid locking issues.
+                self._cache_file.close()
+                raise
 
     def _upload_files(self):
         for source, resume_offset, filename in self._files_to_upload:
@@ -839,29 +912,24 @@ class ArvPutUploadJob(object):
             datablocks = self._datablocks_on_item(self._my_collection())
         return datablocks
 
-
-def expected_bytes_for(pathlist, follow_links=True):
-    # Walk the given directory trees and stat files, adding up file sizes,
-    # so we can display progress as percent
-    bytesum = 0
-    for path in pathlist:
-        if os.path.isdir(path):
-            for root, dirs, files in os.walk(path, followlinks=follow_links):
-                # Sum file sizes
-                for f in files:
-                    filepath = os.path.join(root, f)
-                    # Ignore symlinked files when requested
-                    if (not follow_links) and os.path.islink(filepath):
-                        continue
-                    bytesum += os.path.getsize(filepath)
-        elif not os.path.isfile(path):
-            return None
-        else:
-            bytesum += os.path.getsize(path)
-    return bytesum
-
 _machine_format = "{} {}: {{}} written {{}} total\n".format(sys.argv[0],
                                                             os.getpid())
+
+# Simulate glob.glob() matching behavior without the need to scan the filesystem
+# Note: fnmatch() doesn't work correctly when used with pathnames. For example the
+# pattern 'tests/*.py' will match 'tests/run_test.py' and also 'tests/subdir/run_test.py',
+# so instead we're using it on every path component.
+def pathname_match(pathname, pattern):
+    name = pathname.split(os.sep)
+    # Fix patterns like 'some/subdir/' or 'some//subdir'
+    pat = [x for x in pattern.split(os.sep) if x != '' and x != '.']
+    if len(name) != len(pat):
+        return False
+    for i in range(len(name)):
+        if not fnmatch.fnmatch(name[i], pat[i]):
+            return False
+    return True
+
 def machine_progress(bytes_written, bytes_expected):
     return _machine_format.format(
         bytes_written, -1 if (bytes_expected is None) else bytes_expected)
@@ -937,19 +1005,55 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
     else:
         reporter = None
 
+    # Setup exclude regex from all the --exclude arguments provided
+    name_patterns = []
+    exclude_paths = []
+    exclude_names = None
+    if len(args.exclude) > 0:
+        # We're supporting 2 kinds of exclusion patterns:
+        # 1)   --exclude '*.jpg'    (file/dir name patterns, will only match
+        #                            the name, wherever the file is on the tree)
+        # 2.1) --exclude 'foo/bar'  (file/dir path patterns, will match the
+        #                            entire path, and should be relative to
+        #                            any input dir argument)
+        # 2.2) --exclude './*.jpg'  (Special case for excluding files/dirs
+        #                            placed directly underneath the input dir)
+        for p in args.exclude:
+            # Only relative paths patterns allowed
+            if p.startswith(os.sep):
+                logger.error("Cannot use absolute paths with --exclude")
+                sys.exit(1)
+            if os.path.dirname(p):
+                # We don't support of path patterns with '..'
+                p_parts = p.split(os.sep)
+                if '..' in p_parts:
+                    logger.error(
+                        "Cannot use path patterns that include or '..'")
+                    sys.exit(1)
+                # Path search pattern
+                exclude_paths.append(p)
+            else:
+                # Name-only search pattern
+                name_patterns.append(p)
+        # For name only matching, we can combine all patterns into a single
+        # regexp, for better performance.
+        exclude_names = re.compile('|'.join(
+            [fnmatch.translate(p) for p in name_patterns]
+        )) if len(name_patterns) > 0 else None
+        # Show the user the patterns to be used, just in case they weren't
+        # specified inside quotes and got changed by the shell expansion.
+        logger.info("Exclude patterns: {}".format(args.exclude))
+
     # If this is used by a human, and there's at least one directory to be
     # uploaded, the expected bytes calculation can take a moment.
     if args.progress and any([os.path.isdir(f) for f in args.paths]):
         logger.info("Calculating upload size, this could take some time...")
-    bytes_expected = expected_bytes_for(args.paths, follow_links=args.follow_links)
-
     try:
         writer = ArvPutUploadJob(paths = args.paths,
                                  resume = args.resume,
                                  use_cache = args.use_cache,
                                  filename = args.filename,
                                  reporter = reporter,
-                                 bytes_expected = bytes_expected,
                                  num_retries = args.retries,
                                  replication_desired = args.replication,
                                  put_threads = args.threads,
@@ -959,7 +1063,9 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
                                  update_collection = args.update_collection,
                                  logger=logger,
                                  dry_run=args.dry_run,
-                                 follow_links=args.follow_links)
+                                 follow_links=args.follow_links,
+                                 exclude_paths=exclude_paths,
+                                 exclude_names=exclude_names)
     except ResumeCacheConflict:
         logger.error("\n".join([
             "arv-put: Another process is already uploading this data.",
@@ -975,6 +1081,10 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
     except ArvPutUploadNotPending:
         # No files pending for upload
         sys.exit(0)
+    except PathDoesNotExistError as error:
+        logger.error("\n".join([
+            "arv-put: %s" % str(error)]))
+        sys.exit(1)
 
     # Install our signal handler for each code in CAUGHT_SIGNALS, and save
     # the originals.
@@ -995,16 +1105,6 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
         logger.error("\n".join([
             "arv-put: %s" % str(error)]))
         sys.exit(1)
-    except ArvPutUploadIsPending:
-        # Dry run check successful, return proper exit code.
-        sys.exit(2)
-    except ArvPutUploadNotPending:
-        # No files pending for upload
-        sys.exit(0)
-    except PathDoesNotExistError as error:
-        logger.error("\n".join([
-            "arv-put: %s" % str(error)]))
-        sys.exit(1)
 
     if args.progress:  # Print newline to split stderr from stdout for humans.
         logger.info("\n")
diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py
index f571bdbfdd..57efb97c48 100644
--- a/sdk/python/tests/run_test_server.py
+++ b/sdk/python/tests/run_test_server.py
@@ -497,14 +497,15 @@ def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2):
             'keep_disk': {'keep_service_uuid': svc['uuid'] }
         }).execute()
 
-    # If keepproxy is running, send SIGHUP to make it discover the new
-    # keepstore services.
-    proxypidfile = _pidfile('keepproxy')
-    if os.path.exists(proxypidfile):
-        try:
-            os.kill(int(open(proxypidfile).read()), signal.SIGHUP)
-        except OSError:
-            os.remove(proxypidfile)
+    # If keepproxy and/or keep-web is running, send SIGHUP to make
+    # them discover the new keepstore services.
+    for svc in ('keepproxy', 'keep-web'):
+        pidfile = _pidfile('keepproxy')
+        if os.path.exists(pidfile):
+            try:
+                os.kill(int(open(pidfile).read()), signal.SIGHUP)
+            except OSError:
+                os.remove(pidfile)
 
 def _stop_keep(n):
     kill_server_pid(_pidfile('keep{}'.format(n)))
diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py
index 9c325c8083..b8065ef3aa 100644
--- a/sdk/python/tests/test_arv_put.py
+++ b/sdk/python/tests/test_arv_put.py
@@ -303,9 +303,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
 
     def test_passing_nonexistant_path_raise_exception(self):
         uuid_str = str(uuid.uuid4())
-        cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)])
         with self.assertRaises(arv_put.PathDoesNotExistError):
-            cwriter.start(save_collection=False)
+            cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)])
 
     def test_writer_works_without_cache(self):
         cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False)
@@ -340,7 +339,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
             for expect_count in (None, 8):
                 progression, reporter = self.make_progress_tester()
                 cwriter = arv_put.ArvPutUploadJob([f.name],
-                    reporter=reporter, bytes_expected=expect_count)
+                                                  reporter=reporter)
+                cwriter.bytes_expected = expect_count
                 cwriter.start(save_collection=False)
                 cwriter.destroy_cache()
                 self.assertIn((3, expect_count), progression)
@@ -496,23 +496,20 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
             self.assertGreater(writer.bytes_written, 0)
             self.assertLess(writer.bytes_written,
                             os.path.getsize(self.large_file_name))
-        # Retry the upload using dry_run to check if there is a pending upload
-        writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
-                                          replication_desired=1,
-                                          dry_run=True)
         with self.assertRaises(arv_put.ArvPutUploadIsPending):
-            writer2.start(save_collection=False)
+            # Retry the upload using dry_run to check if there is a pending upload
+            writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
+                                              replication_desired=1,
+                                              dry_run=True)
         # Complete the pending upload
         writer3 = arv_put.ArvPutUploadJob([self.large_file_name],
                                           replication_desired=1)
         writer3.start(save_collection=False)
-        # Confirm there's no pending upload with dry_run=True
-        writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
-                                          replication_desired=1,
-                                          dry_run=True)
         with self.assertRaises(arv_put.ArvPutUploadNotPending):
-            writer4.start(save_collection=False)
-        writer4.destroy_cache()
+            # Confirm there's no pending upload with dry_run=True
+            writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
+                                              replication_desired=1,
+                                              dry_run=True)
         # Test obvious cases
         with self.assertRaises(arv_put.ArvPutUploadIsPending):
             arv_put.ArvPutUploadJob([self.large_file_name],
@@ -531,21 +528,27 @@ class ArvadosExpectedBytesTest(ArvadosBaseTestCase):
     TEST_SIZE = os.path.getsize(__file__)
 
     def test_expected_bytes_for_file(self):
+        writer = arv_put.ArvPutUploadJob([__file__])
         self.assertEqual(self.TEST_SIZE,
-                          arv_put.expected_bytes_for([__file__]))
+                         writer.bytes_expected)
 
     def test_expected_bytes_for_tree(self):
         tree = self.make_tmpdir()
         shutil.copyfile(__file__, os.path.join(tree, 'one'))
         shutil.copyfile(__file__, os.path.join(tree, 'two'))
+
+        writer = arv_put.ArvPutUploadJob([tree])
         self.assertEqual(self.TEST_SIZE * 2,
-                          arv_put.expected_bytes_for([tree]))
+                         writer.bytes_expected)
+        writer = arv_put.ArvPutUploadJob([tree, __file__])
         self.assertEqual(self.TEST_SIZE * 3,
-                          arv_put.expected_bytes_for([tree, __file__]))
+                         writer.bytes_expected)
 
     def test_expected_bytes_for_device(self):
-        self.assertIsNone(arv_put.expected_bytes_for(['/dev/null']))
-        self.assertIsNone(arv_put.expected_bytes_for([__file__, '/dev/null']))
+        writer = arv_put.ArvPutUploadJob(['/dev/null'])
+        self.assertIsNone(writer.bytes_expected)
+        writer = arv_put.ArvPutUploadJob([__file__, '/dev/null'])
+        self.assertIsNone(writer.bytes_expected)
 
 
 class ArvadosPutReportTest(ArvadosBaseTestCase):
@@ -673,6 +676,13 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers,
                           self.call_main_with_args,
                           ['--project-uuid', self.Z_UUID, '--stream'])
 
+    def test_error_when_excluding_absolute_path(self):
+        tmpdir = self.make_tmpdir()
+        self.assertRaises(SystemExit,
+                          self.call_main_with_args,
+                          ['--exclude', '/some/absolute/path/*',
+                           tmpdir])
+
     def test_api_error_handling(self):
         coll_save_mock = mock.Mock(name='arv.collection.Collection().save_new()')
         coll_save_mock.side_effect = arvados.errors.ApiError(
@@ -915,6 +925,50 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
                                        '--project-uuid', self.PROJECT_UUID])
         self.assertEqual(link_name, collection['name'])
 
+    def test_exclude_filename_pattern(self):
+        tmpdir = self.make_tmpdir()
+        tmpsubdir = os.path.join(tmpdir, 'subdir')
+        os.mkdir(tmpsubdir)
+        for fname in ['file1', 'file2', 'file3']:
+            with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f:
+                f.write("This is %s" % fname)
+            with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f:
+                f.write("This is %s" % fname)
+        col = self.run_and_find_collection("", ['--no-progress',
+                                                '--exclude', '*2.txt',
+                                                '--exclude', 'file3.*',
+                                                 tmpdir])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        # None of the file2.txt & file3.txt should have been uploaded
+        self.assertRegex(c['manifest_text'], r'^.*:file1.txt')
+        self.assertNotRegex(c['manifest_text'], r'^.*:file2.txt')
+        self.assertNotRegex(c['manifest_text'], r'^.*:file3.txt')
+
+    def test_exclude_filepath_pattern(self):
+        tmpdir = self.make_tmpdir()
+        tmpsubdir = os.path.join(tmpdir, 'subdir')
+        os.mkdir(tmpsubdir)
+        for fname in ['file1', 'file2', 'file3']:
+            with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f:
+                f.write("This is %s" % fname)
+            with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f:
+                f.write("This is %s" % fname)
+        col = self.run_and_find_collection("", ['--no-progress',
+                                                '--exclude', 'subdir/*2.txt',
+                                                '--exclude', './file1.*',
+                                                 tmpdir])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        # Only tmpdir/file1.txt & tmpdir/subdir/file2.txt should have been excluded
+        self.assertNotRegex(c['manifest_text'],
+                            r'^\./%s.*:file1.txt' % os.path.basename(tmpdir))
+        self.assertNotRegex(c['manifest_text'],
+                            r'^\./%s/subdir.*:file2.txt' % os.path.basename(tmpdir))
+        self.assertRegex(c['manifest_text'],
+                         r'^\./%s.*:file2.txt' % os.path.basename(tmpdir))
+        self.assertRegex(c['manifest_text'], r'^.*:file3.txt')
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/services/api/Rakefile b/services/api/Rakefile
index 9d3d8f314c..fad803cb94 100644
--- a/services/api/Rakefile
+++ b/services/api/Rakefile
@@ -37,10 +37,17 @@ namespace :db do
       require 'tempfile'
       origfnm = File.expand_path('../db/structure.sql', __FILE__)
       tmpfnm = Tempfile.new 'structure.sql', File.expand_path('..', origfnm)
+      copyright_done = false
       begin
         tmpfile = File.new tmpfnm, 'w'
         origfile = File.new origfnm
         origfile.each_line do |line|
+          if !copyright_done
+            if !/Copyright .* Arvados/.match(line)
+               tmpfile.write "-- Copyright (C) The Arvados Authors. All rights reserved.\n--\n-- SPDX-License-Identifier: AGPL-3.0\n\n"
+            end
+            copyright_done = true
+          end
           if /^SET lock_timeout = 0;/ =~ line
             # Avoid edit wars between versions that do/don't write this line.
             next
diff --git a/services/api/db/migrate/20170628185847_jobs_yaml_to_json.rb b/services/api/db/migrate/20170628185847_jobs_yaml_to_json.rb
new file mode 100644
index 0000000000..2c90c9a04b
--- /dev/null
+++ b/services/api/db/migrate/20170628185847_jobs_yaml_to_json.rb
@@ -0,0 +1,21 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require 'migrate_yaml_to_json'
+
+class JobsYamlToJson < ActiveRecord::Migration
+  def up
+    [
+      'components',
+      'script_parameters',
+      'runtime_constraints',
+      'tasks_summary',
+    ].each do |column|
+      MigrateYAMLToJSON.migrate("jobs", column)
+    end
+  end
+
+  def down
+  end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql
index b38cba8109..7e6bb1dca4 100644
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -2789,3 +2789,5 @@ INSERT INTO schema_migrations (version) VALUES ('20170419173712');
 
 INSERT INTO schema_migrations (version) VALUES ('20170419175801');
 
+INSERT INTO schema_migrations (version) VALUES ('20170628185847');
+
diff --git a/services/api/lib/migrate_yaml_to_json.rb b/services/api/lib/migrate_yaml_to_json.rb
new file mode 100644
index 0000000000..1db7ed0113
--- /dev/null
+++ b/services/api/lib/migrate_yaml_to_json.rb
@@ -0,0 +1,32 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+module MigrateYAMLToJSON
+  def self.migrate(table, column)
+    conn = ActiveRecord::Base.connection
+    n = conn.update(
+      "UPDATE #{table} SET #{column}=$1 WHERE #{column}=$2",
+      "#{table}.#{column} convert YAML to JSON",
+      [[nil, "{}"], [nil, "--- {}\n"]])
+    Rails.logger.info("#{table}.#{column}: #{n} rows updated using empty hash")
+    finished = false
+    while !finished
+      n = 0
+      conn.exec_query(
+        "SELECT id, #{column} FROM #{table} WHERE #{column} LIKE $1 LIMIT 100",
+        "#{table}.#{column} check for YAML",
+        [[nil, '---%']],
+      ).rows.map do |id, yaml|
+        n += 1
+        json = SafeJSON.dump(YAML.load(yaml))
+        conn.exec_query(
+          "UPDATE #{table} SET #{column}=$1 WHERE id=$2 AND #{column}=$3",
+          "#{table}.#{column} convert YAML to JSON",
+          [[nil, json], [nil, id], [nil, yaml]])
+      end
+      Rails.logger.info("#{table}.#{column}: #{n} rows updated")
+      finished = (n == 0)
+    end
+  end
+end
diff --git a/services/crunch-run/logging.go b/services/crunch-run/logging.go
index 84f2f2fd63..0083f0999c 100644
--- a/services/crunch-run/logging.go
+++ b/services/crunch-run/logging.go
@@ -373,7 +373,7 @@ func (arvlog *ArvLogWriter) rateLimit(line []byte, now time.Time) (bool, []byte)
 	}
 }
 
-// load the rate limit discovery config paramters
+// load the rate limit discovery config parameters
 func loadLogThrottleParams(clnt IArvadosClient) {
 	param, err := clnt.Discovery("crunchLimitLogBytesPerJob")
 	if err == nil {
diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py
index 11d26adfb8..30ae6b40e0 100644
--- a/services/fuse/arvados_fuse/fusedir.py
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -679,6 +679,7 @@ class TagsDirectory(Directory):
         self.num_retries = num_retries
         self._poll = True
         self._poll_time = poll_time
+        self._extra = set()
 
     def want_event_subscribe(self):
         return True
@@ -687,15 +688,41 @@ class TagsDirectory(Directory):
     def update(self):
         with llfuse.lock_released:
             tags = self.api.links().list(
-                filters=[['link_class', '=', 'tag']],
-                select=['name'], distinct=True
+                filters=[['link_class', '=', 'tag'], ["name", "!=", ""]],
+                select=['name'], distinct=True, limit=1000
                 ).execute(num_retries=self.num_retries)
         if "items" in tags:
-            self.merge(tags['items'],
+            self.merge(tags['items']+[{"name": n} for n in self._extra],
                        lambda i: i['name'],
                        lambda a, i: a.tag == i['name'],
                        lambda i: TagDirectory(self.inode, self.inodes, self.api, self.num_retries, i['name'], poll=self._poll, poll_time=self._poll_time))
 
+    @use_counter
+    @check_update
+    def __getitem__(self, item):
+        if super(TagsDirectory, self).__contains__(item):
+            return super(TagsDirectory, self).__getitem__(item)
+        with llfuse.lock_released:
+            tags = self.api.links().list(
+                filters=[['link_class', '=', 'tag'], ['name', '=', item]], limit=1
+            ).execute(num_retries=self.num_retries)
+        if tags["items"]:
+            self._extra.add(item)
+            self.update()
+        return super(TagsDirectory, self).__getitem__(item)
+
+    @use_counter
+    @check_update
+    def __contains__(self, k):
+        if super(TagsDirectory, self).__contains__(k):
+            return True
+        try:
+            self[k]
+            return True
+        except KeyError:
+            pass
+        return False
+
 
 class TagDirectory(Directory):
     """A special directory that contains as subdirectories all collections visible
diff --git a/services/ws/session_v0_test.go b/services/ws/session_v0_test.go
index 0ae8ceb1c4..1213be5d14 100644
--- a/services/ws/session_v0_test.go
+++ b/services/ws/session_v0_test.go
@@ -83,28 +83,31 @@ func (s *v0Suite) TestLastLogID(c *check.C) {
 	}), check.IsNil)
 	s.expectStatus(c, r, 200)
 
+	avoidRace := make(chan struct{}, cap(uuidChan))
 	go func() {
+		// When last_log_id is given, although v0session sends
+		// old events in order, and sends new events in order,
+		// it doesn't necessarily finish sending all old
+		// events before sending any new events. To avoid
+		// hitting this bug in the test, we wait for the old
+		// events to arrive before emitting any new events.
+		<-avoidRace
 		s.emitEvents(uuidChan)
 		close(uuidChan)
 	}()
 
-	done := make(chan bool)
 	go func() {
 		for uuid := range uuidChan {
 			for _, etype := range []string{"create", "blip", "update"} {
 				lg := s.expectLog(c, r)
-				c.Check(lg.ObjectUUID, check.Equals, uuid)
+				for lg.ObjectUUID != uuid {
+					lg = s.expectLog(c, r)
+				}
 				c.Check(lg.EventType, check.Equals, etype)
 			}
+			avoidRace <- struct{}{}
 		}
-		close(done)
 	}()
-
-	select {
-	case <-time.After(10 * time.Second):
-		c.Fatal("timeout")
-	case <-done:
-	}
 }
 
 func (s *v0Suite) TestPermission(c *check.C) {
@@ -117,16 +120,21 @@ func (s *v0Suite) TestPermission(c *check.C) {
 	}), check.IsNil)
 	s.expectStatus(c, r, 200)
 
-	uuidChan := make(chan string, 1)
+	uuidChan := make(chan string, 2)
 	go func() {
 		s.token = arvadostest.AdminToken
-		s.emitEvents(nil)
+		s.emitEvents(uuidChan)
 		s.token = arvadostest.ActiveToken
 		s.emitEvents(uuidChan)
 	}()
 
+	wrongUUID := <-uuidChan
+	rightUUID := <-uuidChan
 	lg := s.expectLog(c, r)
-	c.Check(lg.ObjectUUID, check.Equals, <-uuidChan)
+	for lg.ObjectUUID != rightUUID {
+		c.Check(lg.ObjectUUID, check.Not(check.Equals), wrongUUID)
+		lg = s.expectLog(c, r)
+	}
 }
 
 func (s *v0Suite) TestSendBadJSON(c *check.C) {
@@ -170,7 +178,9 @@ func (s *v0Suite) TestSubscribe(c *check.C) {
 
 	for _, etype := range []string{"create", "blip", "update"} {
 		lg := s.expectLog(c, r)
-		c.Check(lg.ObjectUUID, check.Equals, uuid)
+		for lg.ObjectUUID != uuid {
+			lg = s.expectLog(c, r)
+		}
 		c.Check(lg.EventType, check.Equals, etype)
 	}
 }
@@ -228,8 +238,17 @@ func (s *v0Suite) expectStatus(c *check.C, r *json.Decoder, status int) {
 
 func (s *v0Suite) expectLog(c *check.C, r *json.Decoder) *arvados.Log {
 	lg := &arvados.Log{}
-	c.Check(r.Decode(lg), check.IsNil)
-	return lg
+	ok := make(chan struct{})
+	go func() {
+		c.Check(r.Decode(lg), check.IsNil)
+		close(ok)
+	}()
+	select {
+	case <-time.After(10 * time.Second):
+		panic("timed out")
+	case <-ok:
+		return lg
+	}
 }
 
 func (s *v0Suite) testClient() (*server, *websocket.Conn, *json.Decoder, *json.Encoder) {