From: Peter Amstutz Date: Fri, 7 Jul 2017 15:27:07 +0000 (-0400) Subject: Merge branch '10584-fuse-stop-threads' refs #10584 X-Git-Tag: 1.1.0~150 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/1e31815d4a0d094633d4acb4f6265d6b8b6e3246?hp=a5b458cad80a34a6e90745d47c7e522cbbeedb6b Merge branch '10584-fuse-stop-threads' refs #10584 Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- diff --git a/.licenseignore b/.licenseignore index 1d0356b51e..51980b16c2 100644 --- a/.licenseignore +++ b/.licenseignore @@ -1,7 +1,9 @@ *agpl-3.0.html *agpl-3.0.txt +apache-2.0.txt apps/workbench/app/assets/javascripts/list.js apps/workbench/public/webshell/* +AUTHORS */bootstrap.css */bootstrap.js *bootstrap-theme.css diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000000..ad72aea0aa --- /dev/null +++ b/AUTHORS @@ -0,0 +1,18 @@ +# Names should be added to this file with this pattern: +# +# For individuals: +# Name +# +# For organizations: +# Organization +# +# See python fnmatch module documentation for more information. + +Curoverse, Inc. <*@curoverse.com> +Adam Savitzky +Colin Nolan +David +Guillermo Carrasco +Joshua Randall +President and Fellows of Harvard College <*@harvard.edu> +Thomas Mooney diff --git a/COPYING b/COPYING index 2cba2ad20a..61c31397a0 100644 --- a/COPYING +++ b/COPYING @@ -1,15 +1,19 @@ -Server-side components of Arvados contained in the apps/ and services/ -directories, including the API Server, Workbench, and Crunch, are licensed -under the GNU Affero General Public License version 3 (see agpl-3.0.txt). +Unless indicated otherwise in the header of the file, the files in this +repository are distributed under one of three different licenses: AGPL-3.0, +Apache-2.0 or CC-BY-SA-3.0. -The files and directories under the build/, lib/ and tools/ directories are -licensed under the GNU Affero General Public License version 3 (see -agpl-3.0.txt). +Individual files contain an SPDX tag that indicates the license for the file. +These are the three tags in use: -The Arvados client Software Development Kits contained in the sdk/ directory, -example scripts in the crunch_scripts/ directory, the files and directories -under backports/ and docker/, and code samples in the Aravados documentation -are licensed under the Apache License, Version 2.0 (see LICENSE-2.0.txt). + SPDX-License-Identifier: AGPL-3.0 + SPDX-License-Identifier: Apache-2.0 + SPDX-License-Identifier: CC-BY-SA-3.0 -The Arvados Documentation located in the doc/ directory is licensed under the -Creative Commons Attribution-Share Alike 3.0 United States (see by-sa-3.0.txt). +This enables machine processing of license information based on the SPDX +License Identifiers that are available here: http://spdx.org/licenses/ + +The full license text for each license is available in this directory: + + AGPL-3.0: agpl-3.0.txt + Apache-2.0: apache-2.0.txt + CC-BY-SA-3.0: cc-by-sa-3.0.txt diff --git a/LICENSE-2.0.txt b/apache-2.0.txt similarity index 100% rename from LICENSE-2.0.txt rename to apache-2.0.txt diff --git a/apps/workbench/app/controllers/collections_controller.rb b/apps/workbench/app/controllers/collections_controller.rb index 99065947b8..f8fcf5108f 100644 --- a/apps/workbench/app/controllers/collections_controller.rb +++ b/apps/workbench/app/controllers/collections_controller.rb @@ -115,6 +115,10 @@ class CollectionsController < ApplicationController end def show_file_links + if Rails.configuration.keep_web_url || Rails.configuration.keep_web_download_url + # show_file will redirect to keep-web's directory listing + return show_file + end Thread.current[:reader_tokens] = [params[:reader_token]] return if false.equal?(find_object_by_uuid) render layout: false @@ -283,7 +287,12 @@ class CollectionsController < ApplicationController helper_method :download_link def download_link - collections_url + "/download/#{@object.uuid}/#{@search_sharing.first.api_token}/" + token = @search_sharing.first.api_token + if Rails.configuration.keep_web_url || Rails.configuration.keep_web_download_url + keep_web_url(@object.uuid, nil, {path_token: token}) + else + collections_url + "/download/#{@object.uuid}/#{token}/" + end end def share @@ -444,7 +453,7 @@ class CollectionsController < ApplicationController uri.path += 't=' + opts[:path_token] + '/' end uri.path += '_/' - uri.path += URI.escape(file) + uri.path += URI.escape(file) if file query = Hash[URI.decode_www_form(uri.query || '')] { query_token: 'api_token', diff --git a/apps/workbench/app/controllers/repositories_controller.rb b/apps/workbench/app/controllers/repositories_controller.rb index 2a39a94018..5ca6f22b02 100644 --- a/apps/workbench/app/controllers/repositories_controller.rb +++ b/apps/workbench/app/controllers/repositories_controller.rb @@ -6,7 +6,7 @@ class RepositoriesController < ApplicationController before_filter :set_share_links, if: -> { defined? @object } def index_pane_list - %w(recent help) + %w(repositories help) end def show_pane_list @@ -36,4 +36,72 @@ class RepositoriesController < ApplicationController def show_commit @commit = params[:commit] end + + def all_repos + limit = params[:limit].andand.to_i || 100 + offset = params[:offset].andand.to_i || 0 + @filters = params[:filters] || [] + + if @filters.any? + owner_filter = @filters.select do |attr, op, val| + (attr == 'owner_uuid') + end + end + + if !owner_filter.andand.any? + filters = @filters + [["owner_uuid", "=", current_user.uuid]] + my_repos = Repository.all.order("name ASC").limit(limit).offset(offset).filter(filters).results + else # done fetching all owned repositories + my_repos = [] + end + + if !owner_filter.andand.any? # if this is next page request, the first page was still fetching "own" repos + @filters = @filters.reject do |attr, op, val| + (attr == 'owner_uuid') or + (attr == 'name') or + (attr == 'uuid') + end + end + + filters = @filters + [["owner_uuid", "!=", current_user.uuid]] + other_repos = Repository.all.order("name ASC").limit(limit).offset(offset).filter(filters).results + + @objects = (my_repos + other_repos).first(limit) + end + + def find_objects_for_index + return if !params[:partial] + + all_repos + + if @objects.any? + @next_page_filters = next_page_filters('>=') + @next_page_href = url_for(partial: :repositories_rows, + filters: @next_page_filters.to_json) + else + @next_page_href = nil + end + end + + def next_page_href with_params={} + @next_page_href + end + + def next_page_filters nextpage_operator + next_page_filters = @filters.reject do |attr, op, val| + (attr == 'owner_uuid') or + (attr == 'name' and op == nextpage_operator) or + (attr == 'uuid' and op == 'not in') + end + + if @objects.any? + last_obj = @objects.last + next_page_filters += [['name', nextpage_operator, last_obj.name]] + next_page_filters += [['uuid', 'not in', [last_obj.uuid]]] + # if not-owned, it means we are done with owned repos and fetching other repos + next_page_filters += [['owner_uuid', '!=', last_obj.uuid]] if last_obj.owner_uuid != current_user.uuid + end + + next_page_filters + end end diff --git a/apps/workbench/app/controllers/users_controller.rb b/apps/workbench/app/controllers/users_controller.rb index 4a40f03120..2e3ced69a5 100644 --- a/apps/workbench/app/controllers/users_controller.rb +++ b/apps/workbench/app/controllers/users_controller.rb @@ -260,28 +260,6 @@ class UsersController < ApplicationController end end - def repositories - # all repositories accessible by current user - all_repositories = Hash[Repository.all.order('name asc').collect {|repo| [repo.uuid, repo]}] - - @my_repositories = [] # we want them ordered as owned and the rest - @repo_writable = {} - - # owned repos - all_repositories.each do |_, repo| - if repo.owner_uuid == current_user.uuid - @repo_writable[repo.uuid] = 'can_write' - @my_repositories << repo - end - end - - # rest of the repos - handled = @my_repositories.map(&:uuid) - all_repositories.each do |_, repo| - @my_repositories << repo if !repo.uuid.in?(handled) - end - end - def virtual_machines @my_vm_logins = {} Link.where(tail_uuid: @object.uuid, diff --git a/apps/workbench/app/views/collections/_show_tags.html.erb b/apps/workbench/app/views/collections/_show_tags.html.erb index fd734c82dc..afab5266e9 100644 --- a/apps/workbench/app/views/collections/_show_tags.html.erb +++ b/apps/workbench/app/views/collections/_show_tags.html.erb @@ -9,7 +9,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
<% if object.editable? %>

- Edit + Edit

<% end %> diff --git a/apps/workbench/app/views/layouts/body.html.erb b/apps/workbench/app/views/layouts/body.html.erb index 8073615c31..3315027b3d 100644 --- a/apps/workbench/app/views/layouts/body.html.erb +++ b/apps/workbench/app/views/layouts/body.html.erb @@ -66,11 +66,7 @@ SPDX-License-Identifier: AGPL-3.0 %> Virtual machines <% end %> -
  • - <%= link_to repositories_user_path(current_user), role: 'menu-item' do %> - Repositories - <% end %> -
  • +
  • Repositories
  • Current token
  • <%= link_to ssh_keys_user_path(current_user), role: 'menu-item' do %> diff --git a/apps/workbench/app/views/users/_add_repository_modal.html.erb b/apps/workbench/app/views/repositories/_add_repository_modal.html.erb similarity index 100% rename from apps/workbench/app/views/users/_add_repository_modal.html.erb rename to apps/workbench/app/views/repositories/_add_repository_modal.html.erb diff --git a/apps/workbench/app/views/repositories/_show_help.html.erb b/apps/workbench/app/views/repositories/_show_help.html.erb index 7980738b55..5904fb29db 100644 --- a/apps/workbench/app/views/repositories/_show_help.html.erb +++ b/apps/workbench/app/views/repositories/_show_help.html.erb @@ -2,7 +2,13 @@ SPDX-License-Identifier: AGPL-3.0 %> -<% if (example = @objects.select(&:push_url).first) %> +<% + filters = @filters + [["owner_uuid", "=", current_user.uuid]] + example = Repository.all.order("name ASC").filter(filters).limit(1).results.first + example = Repository.all.order("name ASC").limit(1).results.first if !example +%> + +<% if example %>

    Sample git quick start: diff --git a/apps/workbench/app/views/repositories/_show_repositories.html.erb b/apps/workbench/app/views/repositories/_show_repositories.html.erb new file mode 100644 index 0000000000..871ba1d365 --- /dev/null +++ b/apps/workbench/app/views/repositories/_show_repositories.html.erb @@ -0,0 +1,46 @@ +<%# Copyright (C) The Arvados Authors. All rights reserved. + +SPDX-License-Identifier: AGPL-3.0 %> + +<%= render partial: "add_repository_modal" %> + +

    +
    +
    +

    + When you are using an Arvados virtual machine, you should clone the https:// URLs. This will authenticate automatically using your API token. +

    +

    + In order to clone git repositories using SSH, <%= link_to ssh_keys_user_path(current_user) do%> add an SSH key to your account<%end%> and clone the git@ URLs. +

    +
    +
    + <%= link_to raw(' Add new repository'), "#", + {class: 'btn btn-xs btn-primary', 'data-toggle' => "modal", + 'data-target' => '#add-repository-modal'} %> +
    +
    + +
    + + + + + + + + + + + + + + + + + + +
    Name URL
    +
    +
    diff --git a/apps/workbench/app/views/repositories/_show_repositories_rows.html.erb b/apps/workbench/app/views/repositories/_show_repositories_rows.html.erb new file mode 100644 index 0000000000..fe88608d78 --- /dev/null +++ b/apps/workbench/app/views/repositories/_show_repositories_rows.html.erb @@ -0,0 +1,23 @@ +<%# Copyright (C) The Arvados Authors. All rights reserved. + +SPDX-License-Identifier: AGPL-3.0 %> + +<% @objects.each do |repo| %> + + + <%= render :partial => "show_object_button", :locals => {object: repo, size: 'xs' } %> + + + <%= repo[:name] %> + + + <%= repo.http_fetch_url %>
    + <%= repo.editable? ? repo.push_url : repo.fetch_url %> + + + <% if repo.editable? %> + <%= render partial: 'delete_object_button', locals: {object: repo} %> + <% end %> + + +<% end %> diff --git a/apps/workbench/app/views/users/_repositories.html.erb b/apps/workbench/app/views/users/_repositories.html.erb deleted file mode 100644 index 057b6a2138..0000000000 --- a/apps/workbench/app/views/users/_repositories.html.erb +++ /dev/null @@ -1,71 +0,0 @@ -<%# Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: AGPL-3.0 %> - -
    -
    -
    - <%= link_to raw(' Add new repository'), "#", - {class: 'btn btn-xs btn-primary', 'data-toggle' => "modal", - 'data-target' => '#add-repository-modal'} %> -
    -

    - <%= link_to repositories_user_path(current_user) do%> - Repositories - <%end%> -

    -
    - -
    -

    - When you are using an Arvados virtual machine, you should clone the https:// URLs. This will authenticate automatically using your API token. -

    -

    - In order to clone git repositories using SSH, <%= link_to ssh_keys_user_path(current_user) do%> add an SSH key to your account<%end%> and clone the git@ URLs. -

    - - <% if !@my_repositories.any? %> - You do not seem to have access to any repositories. If you would like to request access, please contact your system admin. - <% else %> - - - - - - - - - - - - - - - - - <% @my_repositories.andand.each do |repo| %> - - - - - - - <% end %> - -
    Name URL
    - <%= render :partial => "show_object_button", :locals => {object: repo, size: 'xs' } %> - - <%= repo[:name] %> - - <%= repo.http_fetch_url %>
    - <%= @repo_writable[repo.uuid] ? repo.push_url : repo.fetch_url %> -
    - <% if repo.editable? %> - <%= link_to(repository_path(id: repo.uuid), method: :delete, class: 'btn btn-sm', data: {confirm: "Really delete '#{repo.name || repo.uuid}'?"}) do %> - - <% end %> - <% end %> -
    - <% end %> -
    -
    diff --git a/apps/workbench/app/views/users/repositories.html.erb b/apps/workbench/app/views/users/repositories.html.erb deleted file mode 100644 index 8a1a3b99d3..0000000000 --- a/apps/workbench/app/views/users/repositories.html.erb +++ /dev/null @@ -1,6 +0,0 @@ -<%# Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: AGPL-3.0 %> - -<%= render :partial => 'repositories' %> -<%= render partial: "add_repository_modal" %> diff --git a/apps/workbench/test/controllers/repositories_controller_test.rb b/apps/workbench/test/controllers/repositories_controller_test.rb index 561723da54..99e7285b3b 100644 --- a/apps/workbench/test/controllers/repositories_controller_test.rb +++ b/apps/workbench/test/controllers/repositories_controller_test.rb @@ -125,4 +125,20 @@ class RepositoriesControllerTest < ActionController::TestCase assert_select 'tr td', 'COPYING' end end + + test "get repositories lists linked as well as owned repositories" do + params = { + partial: :repositories_rows, + format: :json, + } + get :index, params, session_for(:active) + assert_response :success + repos = assigns(:objects) + assert repos + assert_not_empty repos, "my_repositories should not be empty" + repo_uuids = repos.map(&:uuid) + assert_includes repo_uuids, api_fixture('repositories')['repository2']['uuid'] # owned by active + assert_includes repo_uuids, api_fixture('repositories')['repository4']['uuid'] # shared with active + assert_includes repo_uuids, api_fixture('repositories')['arvados']['uuid'] # shared with all_users + end end diff --git a/apps/workbench/test/controllers/users_controller_test.rb b/apps/workbench/test/controllers/users_controller_test.rb index ee64969f76..ce9282ff77 100644 --- a/apps/workbench/test/controllers/users_controller_test.rb +++ b/apps/workbench/test/controllers/users_controller_test.rb @@ -35,29 +35,6 @@ class UsersControllerTest < ActionController::TestCase assert_match /\/users\/welcome/, @response.redirect_url end - test "show repositories with read, write, or manage permission" do - get :repositories, {id: api_fixture("users")['active']['uuid']}, session_for(:active) - assert_response :success - repos = assigns(:my_repositories) - assert repos - assert_not_empty repos, "my_repositories should not be empty" - editables = repos.collect { |r| !!assigns(:repo_writable)[r.uuid] } - assert_includes editables, true, "should have a writable repository" - assert_includes editables, false, "should have a readonly repository" - end - - test "show repositories lists linked as well as owned repositories" do - get :repositories, {id: api_fixture("users")['active']['uuid']}, session_for(:active) - assert_response :success - repos = assigns(:my_repositories) - assert repos - assert_not_empty repos, "my_repositories should not be empty" - repo_uuids = repos.map(&:uuid) - assert_includes repo_uuids, api_fixture('repositories')['repository2']['uuid'] # owned by active - assert_includes repo_uuids, api_fixture('repositories')['repository4']['uuid'] # shared with active - assert_includes repo_uuids, api_fixture('repositories')['arvados']['uuid'] # shared with all_users - end - test "request shell access" do user = api_fixture('users')['spectator'] diff --git a/apps/workbench/test/integration/application_layout_test.rb b/apps/workbench/test/integration/application_layout_test.rb index e777ebe784..7692d8e5dc 100644 --- a/apps/workbench/test/integration/application_layout_test.rb +++ b/apps/workbench/test/integration/application_layout_test.rb @@ -56,7 +56,7 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest assert_selector "a[href=\"/projects/#{user['uuid']}\"]", text: 'Home project' assert_selector "a[href=\"/users/#{user['uuid']}/virtual_machines\"]", text: 'Virtual machines' - assert_selector "a[href=\"/users/#{user['uuid']}/repositories\"]", text: 'Repositories' + assert_selector "a[href=\"/repositories\"]", text: 'Repositories' assert_selector "a[href=\"/current_token\"]", text: 'Current token' assert_selector "a[href=\"/users/#{user['uuid']}/ssh_keys\"]", text: 'SSH keys' @@ -214,7 +214,7 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest end [ - ['Repositories', nil, 's0uqq'], + ['Repositories', nil, 'active/crunchdispatchtest'], ['Virtual machines', nil, 'testvm.shell'], ['SSH keys', nil, 'public_key'], ['Links', nil, 'link_class'], diff --git a/apps/workbench/test/integration/collection_upload_test.rb b/apps/workbench/test/integration/collection_upload_test.rb index e54a5c2185..608cd521de 100644 --- a/apps/workbench/test/integration/collection_upload_test.rb +++ b/apps/workbench/test/integration/collection_upload_test.rb @@ -96,11 +96,11 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest test "Report network error" do need_selenium "to make file uploads work" use_token :admin do - # Even if you somehow do port>2^16, surely nx.example.net won't + # Even if port 0 is a thing, surely nx.example.net won't # respond KeepService.where(service_type: 'proxy').first. update_attributes(service_host: 'nx.example.net', - service_port: 99999) + service_port: 0) end visit page_with_token 'active', sandbox_path diff --git a/apps/workbench/test/integration/collections_test.rb b/apps/workbench/test/integration/collections_test.rb index af8e1313f8..8619858dfe 100644 --- a/apps/workbench/test/integration/collections_test.rb +++ b/apps/workbench/test/integration/collections_test.rb @@ -426,8 +426,6 @@ class CollectionsTest < ActionDispatch::IntegrationTest end test "collection tags tab" do - need_selenium - visit page_with_token('active', '/collections/zzzzz-4zz18-bv31uwvy3neko21') click_link 'Tags' @@ -447,8 +445,6 @@ class CollectionsTest < ActionDispatch::IntegrationTest assert_selector 'a', text: 'Cancel' # add two tags - first('.edit-collection-tags').click - first('.glyphicon-plus').click first('.collection-tag-field-key').click first('.collection-tag-field-key').set('key 1') diff --git a/apps/workbench/test/integration/download_test.rb b/apps/workbench/test/integration/download_test.rb index 37faef9c3d..407458b62b 100644 --- a/apps/workbench/test/integration/download_test.rb +++ b/apps/workbench/test/integration/download_test.rb @@ -8,6 +8,8 @@ require 'helpers/download_helper' class DownloadTest < ActionDispatch::IntegrationTest include KeepWebConfig + @@wrote_test_data = false + setup do use_keep_web_config @@ -17,10 +19,13 @@ class DownloadTest < ActionDispatch::IntegrationTest # Keep data isn't populated by fixtures, so we have to write any # data we expect to read. - ['foo', 'w a z', "Hello world\n"].each do |data| - md5 = `echo -n #{data.shellescape} | arv-put --no-progress --raw -` - assert_match /^#{Digest::MD5.hexdigest(data)}/, md5 - assert $?.success?, $? + if !@@wrote_test_data + ['foo', 'w a z', "Hello world\n"].each do |data| + md5 = `echo -n #{data.shellescape} | arv-put --no-progress --raw -` + assert_match /^#{Digest::MD5.hexdigest(data)}/, md5 + assert $?.success?, $? + end + @@wrote_test_data = true end end @@ -29,7 +34,7 @@ class DownloadTest < ActionDispatch::IntegrationTest uuid_or_pdh = api_fixture('collections')['foo_file'][id_type] token = api_fixture('api_client_authorizations')['active_all_collections']['api_token'] visit "/collections/download/#{uuid_or_pdh}/#{token}/" - within "#collection_files" do + within 'ul' do click_link 'foo' end assert_no_selector 'a' diff --git a/apps/workbench/test/integration/user_settings_menu_test.rb b/apps/workbench/test/integration/user_settings_menu_test.rb index 75009f709c..6a0e46e26a 100644 --- a/apps/workbench/test/integration/user_settings_menu_test.rb +++ b/apps/workbench/test/integration/user_settings_menu_test.rb @@ -114,7 +114,7 @@ class UserSettingsMenuTest < ActionDispatch::IntegrationTest end test "verify repositories for active user" do - visit page_with_token('active',"/users/#{api_fixture('users')['active']['uuid']}/repositories") + visit page_with_token('active',"/repositories") repos = [[api_fixture('repositories')['foo'], true], [api_fixture('repositories')['repository3'], false], @@ -126,9 +126,9 @@ class UserSettingsMenuTest < ActionDispatch::IntegrationTest assert_text repo['name'] assert_selector 'a', text:'Show' if owned - assert_not_nil first('.fa-trash-o') + assert_not_nil first('.glyphicon-trash') else - assert_nil first('.fa-trash-o') + assert_nil first('.glyphicon-trash') end end end @@ -193,12 +193,12 @@ class UserSettingsMenuTest < ActionDispatch::IntegrationTest [ ['virtual_machines', nil, 'Host name', 'testvm2.shell'], - ['repositories', 'Add new repository', 'It may take a minute or two before you can clone your new repository.', 'active/foo'], + ['/repositories', 'Add new repository', 'It may take a minute or two before you can clone your new repository.', 'active/foo'], ['/current_token', nil, 'HISTIGNORE=$HISTIGNORE', 'ARVADOS_API_TOKEN=3kg6k6lzmp9kj5'], ['ssh_keys', 'Add new SSH key', 'Click here to learn about SSH keys in Arvados.', 'active'], ].each do |page_name, button_name, look_for, content| test "test user settings menu for page #{page_name}" do - if page_name == '/current_token' + if page_name == '/current_token' || page_name == '/repositories' visit page_with_token('active', page_name) else visit page_with_token('active', "/users/#{api_fixture('users')['active']['uuid']}/#{page_name}") @@ -216,7 +216,7 @@ class UserSettingsMenuTest < ActionDispatch::IntegrationTest [ ['virtual_machines', 'You do not have access to any virtual machines.'], - ['/repositories', api_fixture('repositories')['arvados']['uuid']], + ['/repositories', api_fixture('repositories')['arvados']['name']], ['/current_token', 'HISTIGNORE=$HISTIGNORE'], ['ssh_keys', 'You have not yet set up an SSH public key for use with Arvados.'], ].each do |page_name, look_for| diff --git a/build/check-copyright-notices b/build/check-copyright-notices index cf4e9bf181..5298371bd1 100755 --- a/build/check-copyright-notices +++ b/build/check-copyright-notices @@ -173,7 +173,7 @@ ${cc} ${cc}${cc:+ }SPDX-License-Identifier: CC-BY-SA-3.0${ce}" found=$(head -n20 "$fnm" | egrep -A${grepAfter} -B${grepBefore} 'Copyright.*Arvados' || true) case ${fnm} in - Makefile | build/* | lib/* | tools/* | apps/* | services/*) + Makefile | build/* | lib/* | tools/* | apps/* | services/* | sdk/cli/bin/crunch-job) want=${wantGPL} ;; crunch_scripts/* | backports/* | docker/* | sdk/*) diff --git a/by-sa-3.0.txt b/cc-by-sa-3.0.txt similarity index 100% rename from by-sa-3.0.txt rename to cc-by-sa-3.0.txt diff --git a/sdk/cli/bin/arv-run-pipeline-instance b/sdk/cli/bin/arv-run-pipeline-instance index bccfdac0b8..b66e9c0526 100755 --- a/sdk/cli/bin/arv-run-pipeline-instance +++ b/sdk/cli/bin/arv-run-pipeline-instance @@ -260,31 +260,46 @@ class JobCache [] end end + + # create() returns [job, exception]. If both job and exception are + # nil, there was a non-retryable error and the call should not be + # attempted again. def self.create(pipeline, component, job, create_params) @cache ||= {} body = {job: no_nil_values(job)}.merge(no_nil_values(create_params)) - result = $client.execute(:api_method => $arvados.jobs.create, - :body_object => body, - :authenticated => false, - :headers => { - authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN'] - }) - j = JSON.parse result.body, :symbolize_names => true - if j.is_a? Hash and j[:uuid] + result = nil + begin + result = $client.execute( + :api_method => $arvados.jobs.create, + :body_object => body, + :authenticated => false, + :headers => { + authorization: 'OAuth2 '+$arv.config['ARVADOS_API_TOKEN'] + }) + if result.status == 429 || result.status >= 500 + raise Exception.new("HTTP status #{result.status}") + end + rescue Exception => e + return nil, e + end + j = JSON.parse(result.body, :symbolize_names => true) rescue nil + if result.status == 200 && j.is_a?(Hash) && j[:uuid] @cache[j[:uuid]] = j + return j, nil else - debuglog "create job: #{j[:errors] rescue nil} with attributes #{body}", 0 + errors = j[:errors] rescue [] + debuglog "create job: [#{result.status}] #{errors.inspect} with attributes #{body}", 0 msg = "" - j[:errors].each do |err| + errors.each do |err| msg += "Error creating job for component #{component}: #{err}\n" end msg += "Job submission was: #{body.to_json}" pipeline.log_stderr(msg) - nil + return nil, nil end end @@ -396,7 +411,10 @@ class WhRunPipelineInstance end end if !errors.empty? - abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nErrors:\n#{errors.collect { |c,p,e| "#{c}::#{p} - #{e}\n" }.join ""}" + all_errors = errors.collect do |c,p,e| + "#{c}::#{p} - #{e}\n" + end.join("") + abort "\n#{Time.now} -- pipeline_template #{@template[:uuid]}\nErrors:\n#{all_errors}" end debuglog "options=" + @options.pretty_inspect self @@ -463,7 +481,7 @@ class WhRunPipelineInstance # are fully specified (any output_of script_parameters are resolved # to real value) my_submit_id = "instance #{@instance[:uuid]} rand #{rand(2**64).to_s(36)}" - job = JobCache.create(@instance, cname, { + job, err = JobCache.create(@instance, cname, { :script => c[:script], :script_parameters => Hash[c[:script_parameters].map do |key, spec| [key, spec[:value]] @@ -490,9 +508,11 @@ class WhRunPipelineInstance c[:job] = job c[:run_in_process] = (@options[:run_jobs_here] and job[:submit_id] == my_submit_id) - else + elsif err.nil? debuglog "component #{cname} new job failed", 0 job_creation_failed += 1 + else + debuglog "component #{cname} new job failed, err=#{err}", 0 end end @@ -657,7 +677,7 @@ class WhRunPipelineInstance @instance[:state] = 'Complete' else @instance[:state] = 'Paused' - end + end else if ended == @components.length or failed > 0 @instance[:state] = success ? 'Complete' : 'Failed' diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job index c0e3075c9b..afca52cb73 100755 --- a/sdk/cli/bin/crunch-job +++ b/sdk/cli/bin/crunch-job @@ -1,7 +1,7 @@ #!/usr/bin/env perl # Copyright (C) The Arvados Authors. All rights reserved. # -# SPDX-License-Identifier: Apache-2.0 +# SPDX-License-Identifier: AGPL-3.0 # -*- mode: perl; perl-indent-level: 2; indent-tabs-mode: nil; -*- diff --git a/sdk/cwl/test_with_arvbox.sh b/sdk/cwl/test_with_arvbox.sh index 88860c04eb..236658c1f1 100755 --- a/sdk/cwl/test_with_arvbox.sh +++ b/sdk/cwl/test_with_arvbox.sh @@ -48,6 +48,8 @@ if test -z "$ARVBOX_CONTAINER" ; then fi if test $reset_container = 1 ; then + arvbox stop + docker rm $ARVBOX_CONTAINER arvbox reset -f fi @@ -81,7 +83,7 @@ export ARVADOS_API_TOKEN=\$(cat /var/lib/arvados/superuser_token) if test "$tag" = "latest" ; then arv-keepdocker --pull arvados/jobs $tag else - jobsimg=$(curl http://versions.arvados.org/v1/commit/$tag | python -c "import json; import sys; sys.stdout.write(json.load(sys.stdin)['Versions']['Docker']['arvados/jobs'])") + jobsimg=\$(curl http://versions.arvados.org/v1/commit/$tag | python -c "import json; import sys; sys.stdout.write(json.load(sys.stdin)['Versions']['Docker']['arvados/jobs'])") arv-keepdocker --pull arvados/jobs $jobsimg docker tag -f arvados/jobs:$jobsimg arvados/jobs:latest arv-keepdocker arvados/jobs latest diff --git a/sdk/cwl/tests/noreuse.cwl b/sdk/cwl/tests/noreuse.cwl index 46771d1101..4c95eb6817 100644 --- a/sdk/cwl/tests/noreuse.cwl +++ b/sdk/cwl/tests/noreuse.cwl @@ -1,3 +1,7 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + cwlVersion: v1.0 class: Workflow $namespaces: diff --git a/sdk/cwl/tests/stdout.cwl b/sdk/cwl/tests/stdout.cwl index c76a6edec4..2100c37ff1 100644 --- a/sdk/cwl/tests/stdout.cwl +++ b/sdk/cwl/tests/stdout.cwl @@ -1,3 +1,7 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: Apache-2.0 + cwlVersion: v1.0 class: CommandLineTool baseCommand: echo diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py index 548f4b0948..68f63b1c26 100644 --- a/sdk/python/arvados/commands/put.py +++ b/sdk/python/arvados/commands/put.py @@ -14,6 +14,7 @@ import copy import datetime import errno import fcntl +import fnmatch import hashlib import json import logging @@ -160,6 +161,18 @@ run_opts.add_argument('--name', help=""" Save the collection with the specified name. """) +run_opts.add_argument('--exclude', metavar='PATTERN', default=[], + action='append', help=""" +Exclude files and directories whose names match the given glob pattern. When +using a path-like pattern like 'subdir/*.txt', all text files inside 'subdir' +directory, relative to the provided input dirs will be excluded. +When using a filename pattern like '*.txt', any text file will be excluded +no matter where is placed. +For the special case of needing to exclude only files or dirs directly below +the given input directory, you can use a pattern like './exclude_this.gif'. +You can specify multiple patterns by using this argument more than once. +""") + _group = run_opts.add_mutually_exclusive_group() _group.add_argument('--progress', action='store_true', help=""" @@ -248,6 +261,10 @@ def parse_arguments(arguments): if not args.filename: args.filename = 'stdin' + # Remove possible duplicated patterns + if len(args.exclude) > 0: + args.exclude = list(set(args.exclude)) + return args @@ -376,18 +393,20 @@ class ArvPutUploadJob(object): } def __init__(self, paths, resume=True, use_cache=True, reporter=None, - bytes_expected=None, name=None, owner_uuid=None, + name=None, owner_uuid=None, ensure_unique_name=False, num_retries=None, put_threads=None, replication_desired=None, filename=None, update_time=60.0, update_collection=None, logger=logging.getLogger('arvados.arv_put'), dry_run=False, - follow_links=True): + follow_links=True, exclude_paths=[], exclude_names=None): self.paths = paths self.resume = resume self.use_cache = use_cache self.update = False self.reporter = reporter - self.bytes_expected = bytes_expected + # This will set to 0 before start counting, if no special files are going + # to be read. + self.bytes_expected = None self.bytes_written = 0 self.bytes_skipped = 0 self.name = name @@ -415,6 +434,8 @@ class ArvPutUploadJob(object): self.dry_run = dry_run self._checkpoint_before_quit = True self.follow_links = follow_links + self.exclude_paths = exclude_paths + self.exclude_names = exclude_names if not self.use_cache and self.resume: raise ArvPutArgumentConflict('resume cannot be True when use_cache is False') @@ -426,55 +447,101 @@ class ArvPutUploadJob(object): # Load cached data if any and if needed self._setup_state(update_collection) + # Build the upload file list, excluding requested files and counting the + # bytes expected to be uploaded. + self._build_upload_list() + + def _build_upload_list(self): + """ + Scan the requested paths to count file sizes, excluding files & dirs if requested + and building the upload file list. + """ + # If there aren't special files to be read, reset total bytes count to zero + # to start counting. + if not any([p for p in self.paths + if not (os.path.isfile(p) or os.path.isdir(p))]): + self.bytes_expected = 0 + + for path in self.paths: + # Test for stdin first, in case some file named '-' exist + if path == '-': + if self.dry_run: + raise ArvPutUploadIsPending() + self._write_stdin(self.filename or 'stdin') + elif not os.path.exists(path): + raise PathDoesNotExistError("file or directory '{}' does not exist.".format(path)) + elif os.path.isdir(path): + # Use absolute paths on cache index so CWD doesn't interfere + # with the caching logic. + orig_path = path + path = os.path.abspath(path) + if orig_path[-1:] == os.sep: + # When passing a directory reference with a trailing slash, + # its contents should be uploaded directly to the + # collection's root. + prefixdir = path + else: + # When passing a directory reference with no trailing slash, + # upload the directory to the collection's root. + prefixdir = os.path.dirname(path) + prefixdir += os.sep + for root, dirs, files in os.walk(path, + followlinks=self.follow_links): + root_relpath = os.path.relpath(root, path) + if root_relpath == '.': + root_relpath = '' + # Exclude files/dirs by full path matching pattern + if self.exclude_paths: + dirs[:] = [d for d in dirs + if not any(pathname_match( + os.path.join(root_relpath, d), pat) + for pat in self.exclude_paths)] + files = [f for f in files + if not any(pathname_match( + os.path.join(root_relpath, f), pat) + for pat in self.exclude_paths)] + # Exclude files/dirs by name matching pattern + if self.exclude_names is not None: + dirs[:] = [d for d in dirs + if not self.exclude_names.match(d)] + files = [f for f in files + if not self.exclude_names.match(f)] + # Make os.walk()'s dir traversing order deterministic + dirs.sort() + files.sort() + for f in files: + filepath = os.path.join(root, f) + # Add its size to the total bytes count (if applicable) + if self.follow_links or (not os.path.islink(filepath)): + if self.bytes_expected is not None: + self.bytes_expected += os.path.getsize(filepath) + self._check_file(filepath, + os.path.join(root[len(prefixdir):], f)) + else: + filepath = os.path.abspath(path) + # Add its size to the total bytes count (if applicable) + if self.follow_links or (not os.path.islink(filepath)): + if self.bytes_expected is not None: + self.bytes_expected += os.path.getsize(filepath) + self._check_file(filepath, + self.filename or os.path.basename(path)) + # If dry-mode is on, and got up to this point, then we should notify that + # there aren't any file to upload. + if self.dry_run: + raise ArvPutUploadNotPending() + # Remove local_collection's files that don't exist locally anymore, so the + # bytes_written count is correct. + for f in self.collection_file_paths(self._local_collection, + path_prefix=""): + if f != 'stdin' and f != self.filename and not f in self._file_paths: + self._local_collection.remove(f) + def start(self, save_collection): """ Start supporting thread & file uploading """ - if not self.dry_run: - self._checkpointer.start() + self._checkpointer.start() try: - for path in self.paths: - # Test for stdin first, in case some file named '-' exist - if path == '-': - if self.dry_run: - raise ArvPutUploadIsPending() - self._write_stdin(self.filename or 'stdin') - elif not os.path.exists(path): - raise PathDoesNotExistError("file or directory '{}' does not exist.".format(path)) - elif os.path.isdir(path): - # Use absolute paths on cache index so CWD doesn't interfere - # with the caching logic. - orig_path = path - path = os.path.abspath(path) - if orig_path[-1:] == os.sep: - # When passing a directory reference with a trailing slash, - # its contents should be uploaded directly to the collection's root. - prefixdir = path - else: - # When passing a directory reference with no trailing slash, - # upload the directory to the collection's root. - prefixdir = os.path.dirname(path) - prefixdir += os.sep - for root, dirs, files in os.walk(path, followlinks=self.follow_links): - # Make os.walk()'s dir traversing order deterministic - dirs.sort() - files.sort() - for f in files: - self._check_file(os.path.join(root, f), - os.path.join(root[len(prefixdir):], f)) - else: - self._check_file(os.path.abspath(path), - self.filename or os.path.basename(path)) - # If dry-mode is on, and got up to this point, then we should notify that - # there aren't any file to upload. - if self.dry_run: - raise ArvPutUploadNotPending() - # Remove local_collection's files that don't exist locally anymore, so the - # bytes_written count is correct. - for f in self.collection_file_paths(self._local_collection, - path_prefix=""): - if f != 'stdin' and f != self.filename and not f in self._file_paths: - self._local_collection.remove(f) # Update bytes_written from current local collection and # report initial progress. self._update() @@ -660,7 +727,13 @@ class ArvPutUploadJob(object): should_upload = True if should_upload: - self._files_to_upload.append((source, resume_offset, filename)) + try: + self._files_to_upload.append((source, resume_offset, filename)) + except ArvPutUploadIsPending: + # This could happen when running on dry-mode, close cache file to + # avoid locking issues. + self._cache_file.close() + raise def _upload_files(self): for source, resume_offset, filename in self._files_to_upload: @@ -839,29 +912,24 @@ class ArvPutUploadJob(object): datablocks = self._datablocks_on_item(self._my_collection()) return datablocks - -def expected_bytes_for(pathlist, follow_links=True): - # Walk the given directory trees and stat files, adding up file sizes, - # so we can display progress as percent - bytesum = 0 - for path in pathlist: - if os.path.isdir(path): - for root, dirs, files in os.walk(path, followlinks=follow_links): - # Sum file sizes - for f in files: - filepath = os.path.join(root, f) - # Ignore symlinked files when requested - if (not follow_links) and os.path.islink(filepath): - continue - bytesum += os.path.getsize(filepath) - elif not os.path.isfile(path): - return None - else: - bytesum += os.path.getsize(path) - return bytesum - _machine_format = "{} {}: {{}} written {{}} total\n".format(sys.argv[0], os.getpid()) + +# Simulate glob.glob() matching behavior without the need to scan the filesystem +# Note: fnmatch() doesn't work correctly when used with pathnames. For example the +# pattern 'tests/*.py' will match 'tests/run_test.py' and also 'tests/subdir/run_test.py', +# so instead we're using it on every path component. +def pathname_match(pathname, pattern): + name = pathname.split(os.sep) + # Fix patterns like 'some/subdir/' or 'some//subdir' + pat = [x for x in pattern.split(os.sep) if x != '' and x != '.'] + if len(name) != len(pat): + return False + for i in range(len(name)): + if not fnmatch.fnmatch(name[i], pat[i]): + return False + return True + def machine_progress(bytes_written, bytes_expected): return _machine_format.format( bytes_written, -1 if (bytes_expected is None) else bytes_expected) @@ -937,19 +1005,55 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): else: reporter = None + # Setup exclude regex from all the --exclude arguments provided + name_patterns = [] + exclude_paths = [] + exclude_names = None + if len(args.exclude) > 0: + # We're supporting 2 kinds of exclusion patterns: + # 1) --exclude '*.jpg' (file/dir name patterns, will only match + # the name, wherever the file is on the tree) + # 2.1) --exclude 'foo/bar' (file/dir path patterns, will match the + # entire path, and should be relative to + # any input dir argument) + # 2.2) --exclude './*.jpg' (Special case for excluding files/dirs + # placed directly underneath the input dir) + for p in args.exclude: + # Only relative paths patterns allowed + if p.startswith(os.sep): + logger.error("Cannot use absolute paths with --exclude") + sys.exit(1) + if os.path.dirname(p): + # We don't support of path patterns with '..' + p_parts = p.split(os.sep) + if '..' in p_parts: + logger.error( + "Cannot use path patterns that include or '..'") + sys.exit(1) + # Path search pattern + exclude_paths.append(p) + else: + # Name-only search pattern + name_patterns.append(p) + # For name only matching, we can combine all patterns into a single + # regexp, for better performance. + exclude_names = re.compile('|'.join( + [fnmatch.translate(p) for p in name_patterns] + )) if len(name_patterns) > 0 else None + # Show the user the patterns to be used, just in case they weren't + # specified inside quotes and got changed by the shell expansion. + logger.info("Exclude patterns: {}".format(args.exclude)) + # If this is used by a human, and there's at least one directory to be # uploaded, the expected bytes calculation can take a moment. if args.progress and any([os.path.isdir(f) for f in args.paths]): logger.info("Calculating upload size, this could take some time...") - bytes_expected = expected_bytes_for(args.paths, follow_links=args.follow_links) - try: writer = ArvPutUploadJob(paths = args.paths, resume = args.resume, use_cache = args.use_cache, filename = args.filename, reporter = reporter, - bytes_expected = bytes_expected, num_retries = args.retries, replication_desired = args.replication, put_threads = args.threads, @@ -959,7 +1063,9 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): update_collection = args.update_collection, logger=logger, dry_run=args.dry_run, - follow_links=args.follow_links) + follow_links=args.follow_links, + exclude_paths=exclude_paths, + exclude_names=exclude_names) except ResumeCacheConflict: logger.error("\n".join([ "arv-put: Another process is already uploading this data.", @@ -975,6 +1081,10 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): except ArvPutUploadNotPending: # No files pending for upload sys.exit(0) + except PathDoesNotExistError as error: + logger.error("\n".join([ + "arv-put: %s" % str(error)])) + sys.exit(1) # Install our signal handler for each code in CAUGHT_SIGNALS, and save # the originals. @@ -995,16 +1105,6 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr): logger.error("\n".join([ "arv-put: %s" % str(error)])) sys.exit(1) - except ArvPutUploadIsPending: - # Dry run check successful, return proper exit code. - sys.exit(2) - except ArvPutUploadNotPending: - # No files pending for upload - sys.exit(0) - except PathDoesNotExistError as error: - logger.error("\n".join([ - "arv-put: %s" % str(error)])) - sys.exit(1) if args.progress: # Print newline to split stderr from stdout for humans. logger.info("\n") diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py index f571bdbfdd..57efb97c48 100644 --- a/sdk/python/tests/run_test_server.py +++ b/sdk/python/tests/run_test_server.py @@ -497,14 +497,15 @@ def run_keep(blob_signing_key=None, enforce_permissions=False, num_servers=2): 'keep_disk': {'keep_service_uuid': svc['uuid'] } }).execute() - # If keepproxy is running, send SIGHUP to make it discover the new - # keepstore services. - proxypidfile = _pidfile('keepproxy') - if os.path.exists(proxypidfile): - try: - os.kill(int(open(proxypidfile).read()), signal.SIGHUP) - except OSError: - os.remove(proxypidfile) + # If keepproxy and/or keep-web is running, send SIGHUP to make + # them discover the new keepstore services. + for svc in ('keepproxy', 'keep-web'): + pidfile = _pidfile('keepproxy') + if os.path.exists(pidfile): + try: + os.kill(int(open(pidfile).read()), signal.SIGHUP) + except OSError: + os.remove(pidfile) def _stop_keep(n): kill_server_pid(_pidfile('keep{}'.format(n))) diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py index 9c325c8083..b8065ef3aa 100644 --- a/sdk/python/tests/test_arv_put.py +++ b/sdk/python/tests/test_arv_put.py @@ -303,9 +303,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, def test_passing_nonexistant_path_raise_exception(self): uuid_str = str(uuid.uuid4()) - cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)]) with self.assertRaises(arv_put.PathDoesNotExistError): - cwriter.start(save_collection=False) + cwriter = arv_put.ArvPutUploadJob(["/this/path/does/not/exist/{}".format(uuid_str)]) def test_writer_works_without_cache(self): cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False) @@ -340,7 +339,8 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, for expect_count in (None, 8): progression, reporter = self.make_progress_tester() cwriter = arv_put.ArvPutUploadJob([f.name], - reporter=reporter, bytes_expected=expect_count) + reporter=reporter) + cwriter.bytes_expected = expect_count cwriter.start(save_collection=False) cwriter.destroy_cache() self.assertIn((3, expect_count), progression) @@ -496,23 +496,20 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers, self.assertGreater(writer.bytes_written, 0) self.assertLess(writer.bytes_written, os.path.getsize(self.large_file_name)) - # Retry the upload using dry_run to check if there is a pending upload - writer2 = arv_put.ArvPutUploadJob([self.large_file_name], - replication_desired=1, - dry_run=True) with self.assertRaises(arv_put.ArvPutUploadIsPending): - writer2.start(save_collection=False) + # Retry the upload using dry_run to check if there is a pending upload + writer2 = arv_put.ArvPutUploadJob([self.large_file_name], + replication_desired=1, + dry_run=True) # Complete the pending upload writer3 = arv_put.ArvPutUploadJob([self.large_file_name], replication_desired=1) writer3.start(save_collection=False) - # Confirm there's no pending upload with dry_run=True - writer4 = arv_put.ArvPutUploadJob([self.large_file_name], - replication_desired=1, - dry_run=True) with self.assertRaises(arv_put.ArvPutUploadNotPending): - writer4.start(save_collection=False) - writer4.destroy_cache() + # Confirm there's no pending upload with dry_run=True + writer4 = arv_put.ArvPutUploadJob([self.large_file_name], + replication_desired=1, + dry_run=True) # Test obvious cases with self.assertRaises(arv_put.ArvPutUploadIsPending): arv_put.ArvPutUploadJob([self.large_file_name], @@ -531,21 +528,27 @@ class ArvadosExpectedBytesTest(ArvadosBaseTestCase): TEST_SIZE = os.path.getsize(__file__) def test_expected_bytes_for_file(self): + writer = arv_put.ArvPutUploadJob([__file__]) self.assertEqual(self.TEST_SIZE, - arv_put.expected_bytes_for([__file__])) + writer.bytes_expected) def test_expected_bytes_for_tree(self): tree = self.make_tmpdir() shutil.copyfile(__file__, os.path.join(tree, 'one')) shutil.copyfile(__file__, os.path.join(tree, 'two')) + + writer = arv_put.ArvPutUploadJob([tree]) self.assertEqual(self.TEST_SIZE * 2, - arv_put.expected_bytes_for([tree])) + writer.bytes_expected) + writer = arv_put.ArvPutUploadJob([tree, __file__]) self.assertEqual(self.TEST_SIZE * 3, - arv_put.expected_bytes_for([tree, __file__])) + writer.bytes_expected) def test_expected_bytes_for_device(self): - self.assertIsNone(arv_put.expected_bytes_for(['/dev/null'])) - self.assertIsNone(arv_put.expected_bytes_for([__file__, '/dev/null'])) + writer = arv_put.ArvPutUploadJob(['/dev/null']) + self.assertIsNone(writer.bytes_expected) + writer = arv_put.ArvPutUploadJob([__file__, '/dev/null']) + self.assertIsNone(writer.bytes_expected) class ArvadosPutReportTest(ArvadosBaseTestCase): @@ -673,6 +676,13 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, self.call_main_with_args, ['--project-uuid', self.Z_UUID, '--stream']) + def test_error_when_excluding_absolute_path(self): + tmpdir = self.make_tmpdir() + self.assertRaises(SystemExit, + self.call_main_with_args, + ['--exclude', '/some/absolute/path/*', + tmpdir]) + def test_api_error_handling(self): coll_save_mock = mock.Mock(name='arv.collection.Collection().save_new()') coll_save_mock.side_effect = arvados.errors.ApiError( @@ -915,6 +925,50 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers, '--project-uuid', self.PROJECT_UUID]) self.assertEqual(link_name, collection['name']) + def test_exclude_filename_pattern(self): + tmpdir = self.make_tmpdir() + tmpsubdir = os.path.join(tmpdir, 'subdir') + os.mkdir(tmpsubdir) + for fname in ['file1', 'file2', 'file3']: + with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + col = self.run_and_find_collection("", ['--no-progress', + '--exclude', '*2.txt', + '--exclude', 'file3.*', + tmpdir]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + # None of the file2.txt & file3.txt should have been uploaded + self.assertRegex(c['manifest_text'], r'^.*:file1.txt') + self.assertNotRegex(c['manifest_text'], r'^.*:file2.txt') + self.assertNotRegex(c['manifest_text'], r'^.*:file3.txt') + + def test_exclude_filepath_pattern(self): + tmpdir = self.make_tmpdir() + tmpsubdir = os.path.join(tmpdir, 'subdir') + os.mkdir(tmpsubdir) + for fname in ['file1', 'file2', 'file3']: + with open(os.path.join(tmpdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + with open(os.path.join(tmpsubdir, "%s.txt" % fname), 'w') as f: + f.write("This is %s" % fname) + col = self.run_and_find_collection("", ['--no-progress', + '--exclude', 'subdir/*2.txt', + '--exclude', './file1.*', + tmpdir]) + self.assertNotEqual(None, col['uuid']) + c = arv_put.api_client.collections().get(uuid=col['uuid']).execute() + # Only tmpdir/file1.txt & tmpdir/subdir/file2.txt should have been excluded + self.assertNotRegex(c['manifest_text'], + r'^\./%s.*:file1.txt' % os.path.basename(tmpdir)) + self.assertNotRegex(c['manifest_text'], + r'^\./%s/subdir.*:file2.txt' % os.path.basename(tmpdir)) + self.assertRegex(c['manifest_text'], + r'^\./%s.*:file2.txt' % os.path.basename(tmpdir)) + self.assertRegex(c['manifest_text'], r'^.*:file3.txt') + if __name__ == '__main__': unittest.main() diff --git a/services/api/Rakefile b/services/api/Rakefile index 9d3d8f314c..fad803cb94 100644 --- a/services/api/Rakefile +++ b/services/api/Rakefile @@ -37,10 +37,17 @@ namespace :db do require 'tempfile' origfnm = File.expand_path('../db/structure.sql', __FILE__) tmpfnm = Tempfile.new 'structure.sql', File.expand_path('..', origfnm) + copyright_done = false begin tmpfile = File.new tmpfnm, 'w' origfile = File.new origfnm origfile.each_line do |line| + if !copyright_done + if !/Copyright .* Arvados/.match(line) + tmpfile.write "-- Copyright (C) The Arvados Authors. All rights reserved.\n--\n-- SPDX-License-Identifier: AGPL-3.0\n\n" + end + copyright_done = true + end if /^SET lock_timeout = 0;/ =~ line # Avoid edit wars between versions that do/don't write this line. next diff --git a/services/api/db/migrate/20170628185847_jobs_yaml_to_json.rb b/services/api/db/migrate/20170628185847_jobs_yaml_to_json.rb new file mode 100644 index 0000000000..2c90c9a04b --- /dev/null +++ b/services/api/db/migrate/20170628185847_jobs_yaml_to_json.rb @@ -0,0 +1,21 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +require 'migrate_yaml_to_json' + +class JobsYamlToJson < ActiveRecord::Migration + def up + [ + 'components', + 'script_parameters', + 'runtime_constraints', + 'tasks_summary', + ].each do |column| + MigrateYAMLToJSON.migrate("jobs", column) + end + end + + def down + end +end diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql index b38cba8109..7e6bb1dca4 100644 --- a/services/api/db/structure.sql +++ b/services/api/db/structure.sql @@ -2789,3 +2789,5 @@ INSERT INTO schema_migrations (version) VALUES ('20170419173712'); INSERT INTO schema_migrations (version) VALUES ('20170419175801'); +INSERT INTO schema_migrations (version) VALUES ('20170628185847'); + diff --git a/services/api/lib/migrate_yaml_to_json.rb b/services/api/lib/migrate_yaml_to_json.rb new file mode 100644 index 0000000000..1db7ed0113 --- /dev/null +++ b/services/api/lib/migrate_yaml_to_json.rb @@ -0,0 +1,32 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +module MigrateYAMLToJSON + def self.migrate(table, column) + conn = ActiveRecord::Base.connection + n = conn.update( + "UPDATE #{table} SET #{column}=$1 WHERE #{column}=$2", + "#{table}.#{column} convert YAML to JSON", + [[nil, "{}"], [nil, "--- {}\n"]]) + Rails.logger.info("#{table}.#{column}: #{n} rows updated using empty hash") + finished = false + while !finished + n = 0 + conn.exec_query( + "SELECT id, #{column} FROM #{table} WHERE #{column} LIKE $1 LIMIT 100", + "#{table}.#{column} check for YAML", + [[nil, '---%']], + ).rows.map do |id, yaml| + n += 1 + json = SafeJSON.dump(YAML.load(yaml)) + conn.exec_query( + "UPDATE #{table} SET #{column}=$1 WHERE id=$2 AND #{column}=$3", + "#{table}.#{column} convert YAML to JSON", + [[nil, json], [nil, id], [nil, yaml]]) + end + Rails.logger.info("#{table}.#{column}: #{n} rows updated") + finished = (n == 0) + end + end +end diff --git a/services/crunch-run/logging.go b/services/crunch-run/logging.go index 84f2f2fd63..0083f0999c 100644 --- a/services/crunch-run/logging.go +++ b/services/crunch-run/logging.go @@ -373,7 +373,7 @@ func (arvlog *ArvLogWriter) rateLimit(line []byte, now time.Time) (bool, []byte) } } -// load the rate limit discovery config paramters +// load the rate limit discovery config parameters func loadLogThrottleParams(clnt IArvadosClient) { param, err := clnt.Discovery("crunchLimitLogBytesPerJob") if err == nil { diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py index 11d26adfb8..30ae6b40e0 100644 --- a/services/fuse/arvados_fuse/fusedir.py +++ b/services/fuse/arvados_fuse/fusedir.py @@ -679,6 +679,7 @@ class TagsDirectory(Directory): self.num_retries = num_retries self._poll = True self._poll_time = poll_time + self._extra = set() def want_event_subscribe(self): return True @@ -687,15 +688,41 @@ class TagsDirectory(Directory): def update(self): with llfuse.lock_released: tags = self.api.links().list( - filters=[['link_class', '=', 'tag']], - select=['name'], distinct=True + filters=[['link_class', '=', 'tag'], ["name", "!=", ""]], + select=['name'], distinct=True, limit=1000 ).execute(num_retries=self.num_retries) if "items" in tags: - self.merge(tags['items'], + self.merge(tags['items']+[{"name": n} for n in self._extra], lambda i: i['name'], lambda a, i: a.tag == i['name'], lambda i: TagDirectory(self.inode, self.inodes, self.api, self.num_retries, i['name'], poll=self._poll, poll_time=self._poll_time)) + @use_counter + @check_update + def __getitem__(self, item): + if super(TagsDirectory, self).__contains__(item): + return super(TagsDirectory, self).__getitem__(item) + with llfuse.lock_released: + tags = self.api.links().list( + filters=[['link_class', '=', 'tag'], ['name', '=', item]], limit=1 + ).execute(num_retries=self.num_retries) + if tags["items"]: + self._extra.add(item) + self.update() + return super(TagsDirectory, self).__getitem__(item) + + @use_counter + @check_update + def __contains__(self, k): + if super(TagsDirectory, self).__contains__(k): + return True + try: + self[k] + return True + except KeyError: + pass + return False + class TagDirectory(Directory): """A special directory that contains as subdirectories all collections visible diff --git a/services/ws/session_v0_test.go b/services/ws/session_v0_test.go index 0ae8ceb1c4..1213be5d14 100644 --- a/services/ws/session_v0_test.go +++ b/services/ws/session_v0_test.go @@ -83,28 +83,31 @@ func (s *v0Suite) TestLastLogID(c *check.C) { }), check.IsNil) s.expectStatus(c, r, 200) + avoidRace := make(chan struct{}, cap(uuidChan)) go func() { + // When last_log_id is given, although v0session sends + // old events in order, and sends new events in order, + // it doesn't necessarily finish sending all old + // events before sending any new events. To avoid + // hitting this bug in the test, we wait for the old + // events to arrive before emitting any new events. + <-avoidRace s.emitEvents(uuidChan) close(uuidChan) }() - done := make(chan bool) go func() { for uuid := range uuidChan { for _, etype := range []string{"create", "blip", "update"} { lg := s.expectLog(c, r) - c.Check(lg.ObjectUUID, check.Equals, uuid) + for lg.ObjectUUID != uuid { + lg = s.expectLog(c, r) + } c.Check(lg.EventType, check.Equals, etype) } + avoidRace <- struct{}{} } - close(done) }() - - select { - case <-time.After(10 * time.Second): - c.Fatal("timeout") - case <-done: - } } func (s *v0Suite) TestPermission(c *check.C) { @@ -117,16 +120,21 @@ func (s *v0Suite) TestPermission(c *check.C) { }), check.IsNil) s.expectStatus(c, r, 200) - uuidChan := make(chan string, 1) + uuidChan := make(chan string, 2) go func() { s.token = arvadostest.AdminToken - s.emitEvents(nil) + s.emitEvents(uuidChan) s.token = arvadostest.ActiveToken s.emitEvents(uuidChan) }() + wrongUUID := <-uuidChan + rightUUID := <-uuidChan lg := s.expectLog(c, r) - c.Check(lg.ObjectUUID, check.Equals, <-uuidChan) + for lg.ObjectUUID != rightUUID { + c.Check(lg.ObjectUUID, check.Not(check.Equals), wrongUUID) + lg = s.expectLog(c, r) + } } func (s *v0Suite) TestSendBadJSON(c *check.C) { @@ -170,7 +178,9 @@ func (s *v0Suite) TestSubscribe(c *check.C) { for _, etype := range []string{"create", "blip", "update"} { lg := s.expectLog(c, r) - c.Check(lg.ObjectUUID, check.Equals, uuid) + for lg.ObjectUUID != uuid { + lg = s.expectLog(c, r) + } c.Check(lg.EventType, check.Equals, etype) } } @@ -228,8 +238,17 @@ func (s *v0Suite) expectStatus(c *check.C, r *json.Decoder, status int) { func (s *v0Suite) expectLog(c *check.C, r *json.Decoder) *arvados.Log { lg := &arvados.Log{} - c.Check(r.Decode(lg), check.IsNil) - return lg + ok := make(chan struct{}) + go func() { + c.Check(r.Decode(lg), check.IsNil) + close(ok) + }() + select { + case <-time.After(10 * time.Second): + panic("timed out") + case <-ok: + return lg + } } func (s *v0Suite) testClient() (*server, *websocket.Conn, *json.Decoder, *json.Encoder) {