From: Tom Clegg Date: Tue, 25 Aug 2020 20:41:21 +0000 (-0400) Subject: 16314: Merge branch 'master' X-Git-Tag: 2.1.0~107^2~1 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/505c8fa50631201e289cc55230d46fdf52fa2055?hp=b4091adb7ac1a85de6ae1f18895e9d8f9da5d441 16314: Merge branch 'master' Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/apps/workbench/Gemfile b/apps/workbench/Gemfile index 24bfba383f..d5b416b539 100644 --- a/apps/workbench/Gemfile +++ b/apps/workbench/Gemfile @@ -4,7 +4,7 @@ source 'https://rubygems.org' -gem 'rails', '~> 5.0.0' +gem 'rails', '~> 5.2.0' gem 'arvados', git: 'https://github.com/arvados/arvados.git', glob: 'sdk/ruby/arvados.gemspec' gem 'activerecord-nulldb-adapter', git: 'https://github.com/arvados/nulldb' @@ -14,6 +14,13 @@ gem 'sass' gem 'mime-types' gem 'responders', '~> 2.0' +# Pin sprockets to < 4.0 to avoid issues when upgrading rails to 5.2 +# See: https://github.com/rails/sprockets-rails/issues/443 +gem 'sprockets', '~> 3.0' + +# Fast app boot times +gem 'bootsnap', require: false + # Note: keeping this out of the "group :assets" section "may" allow us # to use Coffescript for UJS responses. It also prevents a # warning/problem when running tests: "WARN: tilt autoloading @@ -31,8 +38,14 @@ group :assets do gem 'therubyracer', :platforms => :ruby end -group :development do +group :development, :test, :performance do gem 'byebug' + # Pinning launchy because 2.5 requires ruby >= 2.4, which arvbox currently + # doesn't have because of SSO. + gem 'launchy', '~> 2.4.0' +end + +group :development do gem 'ruby-debug-passenger' gem 'rack-mini-profiler', require: false gem 'flamegraph', require: false @@ -48,7 +61,6 @@ group :test, :diagnostics, :performance do end group :test, :performance do - gem 'byebug' gem 'rails-perftest' gem 'ruby-prof' gem 'rvm-capistrano' @@ -70,12 +82,6 @@ gem 'angularjs-rails', '~> 1.3.8' gem 'less' gem 'less-rails' - -# Wiselinks hasn't been updated for many years and it's using deprecated methods -# Use our own Wiselinks fork until this PR is accepted: -# https://github.com/igor-alexandrov/wiselinks/pull/116 -# gem 'wiselinks', git: 'https://github.com/arvados/wiselinks.git', branch: 'rails-5.1-compatibility' - gem 'sshkey' # To use ActiveModel has_secure_password diff --git a/apps/workbench/Gemfile.lock b/apps/workbench/Gemfile.lock index cb4e7ab9e3..e19172cb2e 100644 --- a/apps/workbench/Gemfile.lock +++ b/apps/workbench/Gemfile.lock @@ -30,39 +30,43 @@ GEM remote: https://rubygems.org/ specs: RedCloth (4.3.2) - actioncable (5.0.7.2) - actionpack (= 5.0.7.2) - nio4r (>= 1.2, < 3.0) - websocket-driver (~> 0.6.1) - actionmailer (5.0.7.2) - actionpack (= 5.0.7.2) - actionview (= 5.0.7.2) - activejob (= 5.0.7.2) + actioncable (5.2.4.3) + actionpack (= 5.2.4.3) + nio4r (~> 2.0) + websocket-driver (>= 0.6.1) + actionmailer (5.2.4.3) + actionpack (= 5.2.4.3) + actionview (= 5.2.4.3) + activejob (= 5.2.4.3) mail (~> 2.5, >= 2.5.4) rails-dom-testing (~> 2.0) - actionpack (5.0.7.2) - actionview (= 5.0.7.2) - activesupport (= 5.0.7.2) - rack (~> 2.0) - rack-test (~> 0.6.3) + actionpack (5.2.4.3) + actionview (= 5.2.4.3) + activesupport (= 5.2.4.3) + rack (~> 2.0, >= 2.0.8) + rack-test (>= 0.6.3) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.0, >= 1.0.2) - actionview (5.0.7.2) - activesupport (= 5.0.7.2) + actionview (5.2.4.3) + activesupport (= 5.2.4.3) builder (~> 3.1) - erubis (~> 2.7.0) + erubi (~> 1.4) rails-dom-testing (~> 2.0) rails-html-sanitizer (~> 1.0, >= 1.0.3) - activejob (5.0.7.2) - activesupport (= 5.0.7.2) + activejob (5.2.4.3) + activesupport (= 5.2.4.3) globalid (>= 0.3.6) - activemodel (5.0.7.2) - activesupport (= 5.0.7.2) - activerecord (5.0.7.2) - activemodel (= 5.0.7.2) - activesupport (= 5.0.7.2) - arel (~> 7.0) - activesupport (5.0.7.2) + activemodel (5.2.4.3) + activesupport (= 5.2.4.3) + activerecord (5.2.4.3) + activemodel (= 5.2.4.3) + activesupport (= 5.2.4.3) + arel (>= 9.0) + activestorage (5.2.4.3) + actionpack (= 5.2.4.3) + activerecord (= 5.2.4.3) + marcel (~> 0.3.1) + activesupport (5.2.4.3) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (>= 0.7, < 2) minitest (~> 5.1) @@ -71,9 +75,9 @@ GEM public_suffix (>= 2.0.2, < 5.0) andand (1.3.3) angularjs-rails (1.3.15) - arel (7.1.4) - arvados-google-api-client (0.8.7.3) - activesupport (>= 3.2, < 5.1) + arel (9.0.0) + arvados-google-api-client (0.8.7.4) + activesupport (>= 3.2, < 5.3) addressable (~> 2.3) autoparse (~> 0.3) extlib (~> 0.9) @@ -89,6 +93,8 @@ GEM multi_json (>= 1.0.0) autoprefixer-rails (9.5.1.1) execjs + bootsnap (1.4.7) + msgpack (~> 1.0) bootstrap-sass (3.4.1) autoprefixer-rails (>= 5.2.1) sassc (>= 2.0.0) @@ -96,7 +102,7 @@ GEM railties (>= 3.1) bootstrap-x-editable-rails (1.5.1.1) railties (>= 3.0) - builder (3.2.3) + builder (3.2.4) byebug (11.0.1) capistrano (2.15.9) highline @@ -121,11 +127,11 @@ GEM execjs coffee-script-source (1.12.2) commonjs (0.2.7) - concurrent-ruby (1.1.5) - crass (1.0.5) + concurrent-ruby (1.1.6) + crass (1.0.6) deep_merge (1.2.1) docile (1.3.1) - erubis (2.7.0) + erubi (1.9.0) execjs (2.7.0) extlib (0.9.16) faraday (0.15.4) @@ -167,25 +173,29 @@ GEM railties (>= 4) request_store (~> 1.0) logstash-event (1.2.02) - loofah (2.3.1) + loofah (2.6.0) crass (~> 1.0.2) nokogiri (>= 1.5.9) mail (2.7.1) mini_mime (>= 0.1.1) + marcel (0.3.3) + mimemagic (~> 0.3.2) memoist (0.16.2) metaclass (0.0.4) - method_source (0.9.2) + method_source (1.0.0) mime-types (3.2.2) mime-types-data (~> 3.2015) mime-types-data (3.2019.0331) - mini_mime (1.0.1) + mimemagic (0.3.5) + mini_mime (1.0.2) mini_portile2 (2.4.0) minitest (5.10.3) mocha (1.8.0) metaclass (~> 0.0.1) morrisjs-rails (0.5.1.2) railties (> 3.1, < 6) - multi_json (1.14.1) + msgpack (1.3.3) + multi_json (1.15.0) multipart-post (2.1.1) net-scp (2.0.0) net-ssh (>= 2.6.5, < 6.0.0) @@ -194,13 +204,13 @@ GEM net-ssh (5.2.0) net-ssh-gateway (2.0.0) net-ssh (>= 4.0.0) - nio4r (2.3.1) - nokogiri (1.10.8) + nio4r (2.5.2) + nokogiri (1.10.10) mini_portile2 (~> 2.4.0) npm-rails (0.2.1) rails (>= 3.2) oj (3.7.12) - os (1.0.1) + os (1.1.1) passenger (6.0.2) rack rake (>= 0.8.1) @@ -213,23 +223,24 @@ GEM cliver (~> 0.3.1) multi_json (~> 1.0) websocket-driver (>= 0.2.0) - public_suffix (4.0.3) + public_suffix (4.0.5) rack (2.2.3) rack-mini-profiler (1.0.2) rack (>= 1.2.0) - rack-test (0.6.3) - rack (>= 1.0) - rails (5.0.7.2) - actioncable (= 5.0.7.2) - actionmailer (= 5.0.7.2) - actionpack (= 5.0.7.2) - actionview (= 5.0.7.2) - activejob (= 5.0.7.2) - activemodel (= 5.0.7.2) - activerecord (= 5.0.7.2) - activesupport (= 5.0.7.2) + rack-test (1.1.0) + rack (>= 1.0, < 3) + rails (5.2.4.3) + actioncable (= 5.2.4.3) + actionmailer (= 5.2.4.3) + actionpack (= 5.2.4.3) + actionview (= 5.2.4.3) + activejob (= 5.2.4.3) + activemodel (= 5.2.4.3) + activerecord (= 5.2.4.3) + activestorage (= 5.2.4.3) + activesupport (= 5.2.4.3) bundler (>= 1.3.0) - railties (= 5.0.7.2) + railties (= 5.2.4.3) sprockets-rails (>= 2.0.0) rails-controller-testing (1.0.4) actionpack (>= 5.0.1.x) @@ -238,15 +249,15 @@ GEM rails-dom-testing (2.0.3) activesupport (>= 4.2.0) nokogiri (>= 1.6) - rails-html-sanitizer (1.0.4) - loofah (~> 2.2, >= 2.2.2) + rails-html-sanitizer (1.3.0) + loofah (~> 2.3) rails-perftest (0.0.7) - railties (5.0.7.2) - actionpack (= 5.0.7.2) - activesupport (= 5.0.7.2) + railties (5.2.4.3) + actionpack (= 5.2.4.3) + activesupport (= 5.2.4.3) method_source rake (>= 0.8.7) - thor (>= 0.18.1, < 2.0) + thor (>= 0.19.0, < 2.0) rake (13.0.1) raphael-rails (2.1.2) rb-fsevent (0.10.3) @@ -305,15 +316,15 @@ GEM therubyracer (0.12.3) libv8 (~> 3.16.14.15) ref - thor (0.20.3) + thor (1.0.1) thread_safe (0.3.6) tilt (2.0.9) - tzinfo (1.2.6) + tzinfo (1.2.7) thread_safe (~> 0.1) uglifier (2.7.2) execjs (>= 0.3.0) json (>= 1.8.0) - websocket-driver (0.6.5) + websocket-driver (0.7.3) websocket-extensions (>= 0.1.0) websocket-extensions (0.1.5) xpath (2.1.0) @@ -328,6 +339,7 @@ DEPENDENCIES andand angularjs-rails (~> 1.3.8) arvados! + bootsnap bootstrap-sass (~> 3.4.1) bootstrap-tab-history-rails bootstrap-x-editable-rails @@ -339,6 +351,7 @@ DEPENDENCIES headless (~> 1.0.2) httpclient (~> 2.5) jquery-rails + launchy (~> 2.4.0) less less-rails lograge @@ -354,7 +367,7 @@ DEPENDENCIES piwik_analytics poltergeist (~> 1.5.1) rack-mini-profiler - rails (~> 5.0.0) + rails (~> 5.2.0) rails-controller-testing rails-perftest raphael-rails @@ -369,10 +382,11 @@ DEPENDENCIES signet (< 0.12) simplecov (~> 0.7) simplecov-rcov + sprockets (~> 3.0) sshkey themes_for_rails! therubyracer uglifier (~> 2.0) BUNDLED WITH - 1.16.6 + 1.17.3 diff --git a/apps/workbench/app/controllers/application_controller.rb b/apps/workbench/app/controllers/application_controller.rb index 8d6f897bb6..77ec68bdb0 100644 --- a/apps/workbench/app/controllers/application_controller.rb +++ b/apps/workbench/app/controllers/application_controller.rb @@ -29,7 +29,6 @@ class ApplicationController < ActionController::Base begin rescue_from(ActiveRecord::RecordNotFound, ActionController::RoutingError, - ActionController::UnknownController, AbstractController::ActionNotFound, with: :render_not_found) rescue_from(Exception, diff --git a/apps/workbench/app/models/application_record.rb b/apps/workbench/app/models/application_record.rb deleted file mode 100644 index 759034da66..0000000000 --- a/apps/workbench/app/models/application_record.rb +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (C) The Arvados Authors. All rights reserved. -# -# SPDX-License-Identifier: AGPL-3.0 - -class ApplicationRecord < ActiveRecord::Base - self.abstract_class = true -end \ No newline at end of file diff --git a/apps/workbench/app/models/arvados_base.rb b/apps/workbench/app/models/arvados_base.rb index b9162c2aec..c5e1a4ed22 100644 --- a/apps/workbench/app/models/arvados_base.rb +++ b/apps/workbench/app/models/arvados_base.rb @@ -106,6 +106,12 @@ class ArvadosBase end end + # The ActiveModel::Dirty API was changed on Rails 5.2 + # See: https://github.com/rails/rails/commit/c3675f50d2e59b7fc173d7b332860c4b1a24a726#diff-aaddd42c7feb0834b1b5c66af69814d3 + def mutations_from_database + @mutations_from_database ||= ActiveModel::NullMutationTracker.instance + end + def self.columns @discovered_columns = [] if !defined?(@discovered_columns) return @discovered_columns if @discovered_columns.andand.any? diff --git a/apps/workbench/bin/bundle b/apps/workbench/bin/bundle index 9447ba8612..cb10307acd 100755 --- a/apps/workbench/bin/bundle +++ b/apps/workbench/bin/bundle @@ -3,5 +3,5 @@ # # SPDX-License-Identifier: AGPL-3.0 -ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__) +ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __dir__) load Gem.bin_path('bundler', 'bundle') diff --git a/apps/workbench/bin/setup b/apps/workbench/bin/setup index 50c3fa0548..7aed0fb282 100755 --- a/apps/workbench/bin/setup +++ b/apps/workbench/bin/setup @@ -3,12 +3,11 @@ # # SPDX-License-Identifier: AGPL-3.0 -require 'pathname' require 'fileutils' include FileUtils # path to your application root. -APP_ROOT = Pathname.new File.expand_path('../../', __FILE__) +APP_ROOT = File.expand_path('..', __dir__) def system!(*args) system(*args) || abort("\n== Command #{args} failed ==") @@ -22,6 +21,9 @@ chdir APP_ROOT do system! 'gem install bundler --conservative' system('bundle check') || system!('bundle install') + # Install JavaScript dependencies if using Yarn + # system('bin/yarn') + # puts "\n== Copying sample files ==" # unless File.exist?('config/database.yml') # cp 'config/database.yml.sample', 'config/database.yml' diff --git a/apps/workbench/bin/update b/apps/workbench/bin/update index b56771ece8..46aa76ca87 100755 --- a/apps/workbench/bin/update +++ b/apps/workbench/bin/update @@ -22,6 +22,9 @@ chdir APP_ROOT do system! 'gem install bundler --conservative' system('bundle check') || system!('bundle install') + # Install JavaScript dependencies if using Yarn + # system('bin/yarn') + puts "\n== Updating database ==" system! 'bin/rails db:migrate' diff --git a/apps/workbench/bin/yarn b/apps/workbench/bin/yarn new file mode 100755 index 0000000000..5fc7611952 --- /dev/null +++ b/apps/workbench/bin/yarn @@ -0,0 +1,15 @@ +#!/usr/bin/env ruby +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +APP_ROOT = File.expand_path('..', __dir__) +Dir.chdir(APP_ROOT) do + begin + exec "yarnpkg #{ARGV.join(" ")}" + rescue Errno::ENOENT + $stderr.puts "Yarn executable was not detected in the system." + $stderr.puts "Download Yarn at https://yarnpkg.com/en/docs/install" + exit 1 + end +end diff --git a/apps/workbench/config/application.default.yml b/apps/workbench/config/application.default.yml index 9456e61455..255ad44f85 100644 --- a/apps/workbench/config/application.default.yml +++ b/apps/workbench/config/application.default.yml @@ -77,7 +77,6 @@ test: action_mailer.delivery_method: :test active_support.deprecation: :stderr profiling_enabled: true - secret_token: <%= rand(2**256).to_s(36) %> secret_key_base: <%= rand(2**256).to_s(36) %> site_name: Workbench:test diff --git a/apps/workbench/config/application.rb b/apps/workbench/config/application.rb index e88229b851..42bf4da24b 100644 --- a/apps/workbench/config/application.rb +++ b/apps/workbench/config/application.rb @@ -2,13 +2,15 @@ # # SPDX-License-Identifier: AGPL-3.0 -require File.expand_path('../boot', __FILE__) +require_relative 'boot' require "rails" # Pick only the frameworks we need: require "active_model/railtie" require "active_job/railtie" require "active_record/railtie" +# Skip ActiveStorage (new in Rails 5.1) +# require "active_storage/engine" require "action_controller/railtie" require "action_mailer/railtie" require "action_view/railtie" @@ -28,6 +30,9 @@ module ArvadosWorkbench require_relative "arvados_config.rb" + # Initialize configuration defaults for originally generated Rails version. + config.load_defaults 5.1 + # Settings in config/environments/* take precedence over those specified here. # Application configuration should go into files in config/initializers # -- all .rb files in that directory are automatically loaded. diff --git a/apps/workbench/config/boot.rb b/apps/workbench/config/boot.rb index 8153266683..6add5911f6 100644 --- a/apps/workbench/config/boot.rb +++ b/apps/workbench/config/boot.rb @@ -8,6 +8,7 @@ require 'rubygems' ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__) require 'bundler/setup' if File.exists?(ENV['BUNDLE_GEMFILE']) +require 'bootsnap/setup' # Speed up boot time by caching expensive operations. # Use ARVADOS_API_TOKEN environment variable (if set) in console require 'rails' diff --git a/apps/workbench/config/initializers/content_security_policy.rb b/apps/workbench/config/initializers/content_security_policy.rb new file mode 100644 index 0000000000..853ecdeec4 --- /dev/null +++ b/apps/workbench/config/initializers/content_security_policy.rb @@ -0,0 +1,29 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +# Be sure to restart your server when you modify this file. + +# Define an application-wide content security policy +# For further information see the following documentation +# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy + +# Rails.application.config.content_security_policy do |policy| +# policy.default_src :self, :https +# policy.font_src :self, :https, :data +# policy.img_src :self, :https, :data +# policy.object_src :none +# policy.script_src :self, :https +# policy.style_src :self, :https + +# # Specify URI for violation reports +# # policy.report_uri "/csp-violation-report-endpoint" +# end + +# If you are using UJS then enable automatic nonce generation +# Rails.application.config.content_security_policy_nonce_generator = -> request { SecureRandom.base64(16) } + +# Report CSP violations to a specified URI +# For further information see the following documentation: +# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy-Report-Only +# Rails.application.config.content_security_policy_report_only = true diff --git a/apps/workbench/config/initializers/new_framework_defaults.rb b/apps/workbench/config/initializers/new_framework_defaults.rb index b8dca33a37..2e2f0b1810 100644 --- a/apps/workbench/config/initializers/new_framework_defaults.rb +++ b/apps/workbench/config/initializers/new_framework_defaults.rb @@ -24,6 +24,3 @@ ActiveSupport.to_time_preserves_timezone = false # Require `belongs_to` associations by default. Previous versions had false. Rails.application.config.active_record.belongs_to_required_by_default = false - -# Do not halt callback chains when a callback returns false. Previous versions had true. -ActiveSupport.halt_callback_chains_on_return_false = true diff --git a/apps/workbench/config/initializers/new_framework_defaults_5_1.rb b/apps/workbench/config/initializers/new_framework_defaults_5_1.rb new file mode 100644 index 0000000000..804ee6f506 --- /dev/null +++ b/apps/workbench/config/initializers/new_framework_defaults_5_1.rb @@ -0,0 +1,18 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +# Be sure to restart your server when you modify this file. +# +# This file contains migration options to ease your Rails 5.1 upgrade. +# +# Once upgraded flip defaults one by one to migrate to the new default. +# +# Read the Guide for Upgrading Ruby on Rails for more info on each option. + +# Make `form_with` generate non-remote forms. +Rails.application.config.action_view.form_with_generates_remote_forms = false + +# Unknown asset fallback will return the path passed in when the given +# asset is not present in the asset pipeline. +# Rails.application.config.assets.unknown_asset_fallback = false diff --git a/apps/workbench/config/initializers/new_framework_defaults_5_2.rb b/apps/workbench/config/initializers/new_framework_defaults_5_2.rb new file mode 100644 index 0000000000..93a8d52406 --- /dev/null +++ b/apps/workbench/config/initializers/new_framework_defaults_5_2.rb @@ -0,0 +1,42 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +# Be sure to restart your server when you modify this file. +# +# This file contains migration options to ease your Rails 5.2 upgrade. +# +# Once upgraded flip defaults one by one to migrate to the new default. +# +# Read the Guide for Upgrading Ruby on Rails for more info on each option. + +# Make Active Record use stable #cache_key alongside new #cache_version method. +# This is needed for recyclable cache keys. +# Rails.application.config.active_record.cache_versioning = true + +# Use AES-256-GCM authenticated encryption for encrypted cookies. +# Also, embed cookie expiry in signed or encrypted cookies for increased security. +# +# This option is not backwards compatible with earlier Rails versions. +# It's best enabled when your entire app is migrated and stable on 5.2. +# +# Existing cookies will be converted on read then written with the new scheme. +# Rails.application.config.action_dispatch.use_authenticated_cookie_encryption = true + +# Use AES-256-GCM authenticated encryption as default cipher for encrypting messages +# instead of AES-256-CBC, when use_authenticated_message_encryption is set to true. +# Rails.application.config.active_support.use_authenticated_message_encryption = true + +# Add default protection from forgery to ActionController::Base instead of in +# ApplicationController. +# Rails.application.config.action_controller.default_protect_from_forgery = true + +# Store boolean values are in sqlite3 databases as 1 and 0 instead of 't' and +# 'f' after migrating old data. +# Rails.application.config.active_record.sqlite3.represent_boolean_as_integer = true + +# Use SHA-1 instead of MD5 to generate non-sensitive digests, such as the ETag header. +# Rails.application.config.active_support.use_sha1_digests = true + +# Make `form_with` generate id attributes for any generated HTML tags. +# Rails.application.config.action_view.form_with_generates_ids = true diff --git a/apps/workbench/config/routes.rb b/apps/workbench/config/routes.rb index 718adfd2ed..ffc09ac933 100644 --- a/apps/workbench/config/routes.rb +++ b/apps/workbench/config/routes.rb @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: AGPL-3.0 -ArvadosWorkbench::Application.routes.draw do +Rails.application.routes.draw do themes_for_rails resources :keep_disks diff --git a/apps/workbench/config/secrets.yml b/apps/workbench/config/secrets.yml index bc8a0d0de5..57399082e8 100644 --- a/apps/workbench/config/secrets.yml +++ b/apps/workbench/config/secrets.yml @@ -11,16 +11,16 @@ # no regular words or you'll be exposed to dictionary attacks. # You can use `rails secret` to generate a secure secret key. -# Make sure the secrets in this file are kept private -# if you're sharing your code publicly. +# NOTE that these get overriden by Arvados' own configuration system. -development: - secret_key_base: 33e2d171ec6c67cf8e9a9fbfadc1071328bdab761297e2fe28b9db7613dd542c1ba3bdb3bd3e636d1d6f74ab73a2d90c4e9c0ecc14fde8ccd153045f94e9cc41 +# development: +# secret_key_base: <%= rand(1<<255).to_s(36) %> -test: - secret_key_base: d4c07cab3530fccf5d86565ecdc359eb2a853b8ede3b06edb2885e4423d7a726f50a3e415bb940fd4861e8fec16459665fd377acc8cdd98ea63294d2e0d12bb2 +# test: +# secret_key_base: <%= rand(1<<255).to_s(36) %> -# Do not keep production secrets in the repository, -# instead read values from the environment. +# In case this doesn't get overriden for some reason, assign a random key +# to gracefully degrade by rejecting cookies instead of by opening a +# vulnerability. production: - secret_key_base: <%= ENV["SECRET_KEY_BASE"] %> + secret_key_base: <%= rand(1<<255).to_s(36) %> diff --git a/build/rails-package-scripts/README.md b/build/rails-package-scripts/README.md index 0d720bde14..2930957b94 100644 --- a/build/rails-package-scripts/README.md +++ b/build/rails-package-scripts/README.md @@ -12,7 +12,7 @@ Since our build process is a tower of shell scripts, concatenating files seemed postinst.sh lets the early parts define a few hooks to control behavior: -* After it installs the core configuration files (database.yml, application.yml, and production.rb) to /etc/arvados/server, it calls setup_extra_conffiles. By default this is a noop function (in step2.sh). API server defines this to set up the old omniauth.rb conffile. +* After it installs the core configuration files (database.yml, application.yml, and production.rb) to /etc/arvados/server, it calls setup_extra_conffiles. By default this is a noop function (in step2.sh). * Before it restarts nginx, it calls setup_before_nginx_restart. By default this is a noop function (in step2.sh). API server defines this to set up the internal git repository, if necessary. * $RAILSPKG_DATABASE_LOAD_TASK defines the Rake task to load the database. API server uses db:structure:load. SSO server uses db:schema:load. Workbench doesn't set this, which causes the postinst to skip all database work. * If $RAILSPKG_SUPPORTS_CONFIG_CHECK != 1, it won't run the config:check rake task. SSO clears this flag (it doesn't have that task code). diff --git a/build/rails-package-scripts/arvados-api-server.sh b/build/rails-package-scripts/arvados-api-server.sh index 82bc9898aa..027383ab4f 100644 --- a/build/rails-package-scripts/arvados-api-server.sh +++ b/build/rails-package-scripts/arvados-api-server.sh @@ -12,7 +12,9 @@ DOC_URL="http://doc.arvados.org/install/install-api-server.html#configure" RAILSPKG_DATABASE_LOAD_TASK=db:structure:load setup_extra_conffiles() { - setup_conffile initializers/omniauth.rb + # Rails 5.2 does not tolerate dangling symlinks in the initializers directory, and this one + # can still be there, left over from a previous version of the API server package. + rm -f $RELEASE_PATH/config/initializers/omniauth.rb } setup_before_nginx_restart() { diff --git a/build/rails-package-scripts/prerm.sh b/build/rails-package-scripts/prerm.sh index 9816b14c6d..6773b6f308 100644 --- a/build/rails-package-scripts/prerm.sh +++ b/build/rails-package-scripts/prerm.sh @@ -10,7 +10,6 @@ remove () { rm -f $RELEASE_PATH/config/environments/production.rb rm -f $RELEASE_PATH/config/application.yml # Old API server configuration file. - rm -f $RELEASE_PATH/config/initializers/omniauth.rb rm -rf $RELEASE_PATH/public/assets/ rm -rf $RELEASE_PATH/tmp rm -rf $RELEASE_PATH/.bundle diff --git a/build/run-build-docker-jobs-image.sh b/build/run-build-docker-jobs-image.sh index ec8357701d..d1fb2ac670 100755 --- a/build/run-build-docker-jobs-image.sh +++ b/build/run-build-docker-jobs-image.sh @@ -185,28 +185,23 @@ if docker --version |grep " 1\.[0-9]\." ; then FORCE=-f fi -#docker export arvados/jobs:$cwl_runner_version_orig | docker import - arvados/jobs:$cwl_runner_version_orig - if ! [[ -z "$version_tag" ]]; then docker tag $FORCE arvados/jobs:$cwl_runner_version_orig arvados/jobs:"$version_tag" -else - docker tag $FORCE arvados/jobs:$cwl_runner_version_orig arvados/jobs:latest -fi + ECODE=$? -ECODE=$? + if [[ "$ECODE" != "0" ]]; then + EXITCODE=$(($EXITCODE + $ECODE)) + fi -if [[ "$ECODE" != "0" ]]; then - EXITCODE=$(($EXITCODE + $ECODE)) + checkexit $ECODE "docker tag" + title "docker tag complete (`timer`)" fi -checkexit $ECODE "docker tag" -title "docker tag complete (`timer`)" - title "uploading images" timer_reset -if [[ "$ECODE" != "0" ]]; then +if [[ "$EXITCODE" != "0" ]]; then title "upload arvados images SKIPPED because build or tag failed" else if [[ $upload == true ]]; then @@ -217,7 +212,6 @@ else docker_push arvados/jobs:"$version_tag" else docker_push arvados/jobs:$cwl_runner_version_orig - docker_push arvados/jobs:latest fi title "upload arvados images finished (`timer`)" else diff --git a/build/run-build-packages-python-and-ruby.sh b/build/run-build-packages-python-and-ruby.sh index 66201b3b4d..f3b7564d71 100755 --- a/build/run-build-packages-python-and-ruby.sh +++ b/build/run-build-packages-python-and-ruby.sh @@ -192,7 +192,6 @@ PYTHON_BUILD_FAILURES=0 if [ $PYTHON -eq 1 ]; then debug_echo "Building Python packages" python_wrapper arvados-python-client "$WORKSPACE/sdk/python" - python_wrapper arvados-pam "$WORKSPACE/sdk/pam" python_wrapper arvados-cwl-runner "$WORKSPACE/sdk/cwl" python_wrapper arvados_fuse "$WORKSPACE/services/fuse" diff --git a/build/run-tests.sh b/build/run-tests.sh index f35ae5bc13..4d76589336 100755 --- a/build/run-tests.sh +++ b/build/run-tests.sh @@ -198,7 +198,7 @@ sanity_checks() { ( [[ -n "$WORKSPACE" ]] && [[ -d "$WORKSPACE/services" ]] ) \ || fatal "WORKSPACE environment variable not set to a source directory (see: $0 --help)" [[ -z "$CONFIGSRC" ]] || [[ -s "$CONFIGSRC/config.yml" ]] \ - || fatal "CONFIGSRC is $CONFIGSRC but '$CONFIGSRC/config.yml' is empty or not found (see: $0 --help)" + || fatal "CONFIGSRC is $CONFIGSRC but '$CONFIGSRC/config.yml' is empty or not found (see: $0 --help)" echo Checking dependencies: echo "locale: ${LANG}" [[ "$(locale charmap)" = "UTF-8" ]] \ @@ -376,7 +376,7 @@ if [[ ${skip["sdk/R"]} == 1 && ${skip["doc"]} == 1 ]]; then fi if [[ $NEED_SDK_R == false ]]; then - echo "R SDK not needed, it will not be installed." + echo "R SDK not needed, it will not be installed." fi checkpidfile() { @@ -417,11 +417,11 @@ start_services() { . "$VENVDIR/bin/activate" echo 'Starting API, controller, keepproxy, keep-web, arv-git-httpd, ws, and nginx ssl proxy...' if [[ ! -d "$WORKSPACE/services/api/log" ]]; then - mkdir -p "$WORKSPACE/services/api/log" + mkdir -p "$WORKSPACE/services/api/log" fi # Remove empty api.pid file if it exists if [[ -f "$WORKSPACE/tmp/api.pid" && ! -s "$WORKSPACE/tmp/api.pid" ]]; then - rm -f "$WORKSPACE/tmp/api.pid" + rm -f "$WORKSPACE/tmp/api.pid" fi all_services_stopped= fail=1 @@ -820,19 +820,19 @@ do_test_once() { check_arvados_config() { if [[ "$1" = "env" ]] ; then - return + return fi if [[ -z "$ARVADOS_CONFIG" ]] ; then - # Create config file. The run_test_server script requires PyYAML, - # so virtualenv needs to be active. Downstream steps like - # workbench install which require a valid config.yml. - if [[ ! -s "$VENVDIR/bin/activate" ]] ; then - install_env - fi - . "$VENVDIR/bin/activate" + # Create config file. The run_test_server script requires PyYAML, + # so virtualenv needs to be active. Downstream steps like + # workbench install which require a valid config.yml. + if [[ ! -s "$VENVDIR/bin/activate" ]] ; then + install_env + fi + . "$VENVDIR/bin/activate" cd "$WORKSPACE" - eval $(python sdk/python/tests/run_test_server.py setup_config) - deactivate + eval $(python sdk/python/tests/run_test_server.py setup_config) + deactivate fi } @@ -1015,7 +1015,7 @@ test_doc() { ( set -e cd "$WORKSPACE/doc" - ARVADOS_API_HOST=qr1hi.arvadosapi.com + ARVADOS_API_HOST=pirca.arvadosapi.com # Make sure python-epydoc is installed or the next line won't # do much good! PYTHONPATH=$WORKSPACE/sdk/python/ "$bundle" exec rake linkchecker baseurl=file://$WORKSPACE/doc/.site/ arvados_workbench_host=https://workbench.$ARVADOS_API_HOST arvados_api_host=$ARVADOS_API_HOST diff --git a/doc/_config.yml b/doc/_config.yml index bbab3f307e..968ca51fa9 100644 --- a/doc/_config.yml +++ b/doc/_config.yml @@ -27,38 +27,37 @@ navbar: - Run a workflow using Workbench: - user/getting_started/workbench.html.textile.liquid - user/tutorials/tutorial-workflow-workbench.html.textile.liquid - - user/composer/composer.html.textile.liquid + - Working at the Command Line: + - user/getting_started/setup-cli.html.textile.liquid + - user/reference/api-tokens.html.textile.liquid + - user/getting_started/check-environment.html.textile.liquid - Access an Arvados virtual machine: - user/getting_started/vm-login-with-webshell.html.textile.liquid - user/getting_started/ssh-access-unix.html.textile.liquid - user/getting_started/ssh-access-windows.html.textile.liquid - - user/getting_started/check-environment.html.textile.liquid - - user/reference/api-tokens.html.textile.liquid - Working with data sets: - user/tutorials/tutorial-keep.html.textile.liquid - user/tutorials/tutorial-keep-get.html.textile.liquid - user/tutorials/tutorial-keep-mount-gnu-linux.html.textile.liquid - user/tutorials/tutorial-keep-mount-os-x.html.textile.liquid - user/tutorials/tutorial-keep-mount-windows.html.textile.liquid - - user/topics/keep.html.textile.liquid - user/tutorials/tutorial-keep-collection-lifecycle.html.textile.liquid - user/topics/arv-copy.html.textile.liquid - - user/topics/storage-classes.html.textile.liquid - user/topics/collection-versioning.html.textile.liquid - - Working with git repositories: - - user/tutorials/add-new-repository.html.textile.liquid - - user/tutorials/git-arvados-guide.html.textile.liquid - - Running workflows at the command line: + - user/topics/storage-classes.html.textile.liquid + - user/topics/keep.html.textile.liquid + - Data Analysis with Workflows: - user/cwl/cwl-runner.html.textile.liquid - user/cwl/cwl-run-options.html.textile.liquid - - Develop an Arvados workflow: - - user/tutorials/intro-crunch.html.textile.liquid - user/tutorials/writing-cwl-workflow.html.textile.liquid + - user/topics/arv-docker.html.textile.liquid - user/cwl/cwl-style.html.textile.liquid - - user/cwl/federated-workflows.html.textile.liquid - user/cwl/cwl-extensions.html.textile.liquid + - user/cwl/federated-workflows.html.textile.liquid - user/cwl/cwl-versions.html.textile.liquid - - user/topics/arv-docker.html.textile.liquid + - Working with git repositories: + - user/tutorials/add-new-repository.html.textile.liquid + - user/tutorials/git-arvados-guide.html.textile.liquid - Reference: - user/topics/link-accounts.html.textile.liquid - user/reference/cookbook.html.textile.liquid @@ -75,8 +74,9 @@ navbar: - sdk/python/example.html.textile.liquid - sdk/python/python.html.textile.liquid - sdk/python/arvados-fuse.html.textile.liquid - - sdk/python/events.html.textile.liquid + - sdk/python/arvados-cwl-runner.html.textile.liquid - sdk/python/cookbook.html.textile.liquid + - sdk/python/events.html.textile.liquid - CLI: - sdk/cli/install.html.textile.liquid - sdk/cli/index.html.textile.liquid @@ -162,6 +162,7 @@ navbar: - admin/migrating-providers.html.textile.liquid - user/topics/arvados-sync-groups.html.textile.liquid - admin/scoped-tokens.html.textile.liquid + - admin/token-expiration-policy.html.textile.liquid - Monitoring: - admin/logging.html.textile.liquid - admin/metrics.html.textile.liquid diff --git a/doc/_includes/_0_filter_py.liquid b/doc/_includes/_0_filter_py.liquid deleted file mode 100644 index ff055dbb59..0000000000 --- a/doc/_includes/_0_filter_py.liquid +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env python -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -# Import the Arvados sdk module -import arvados - -# Get information about the task from the environment -this_task = arvados.current_task() - -this_task_input = arvados.current_job()['script_parameters']['input'] - -# Create the object access to the collection referred to in the input -collection = arvados.CollectionReader(this_task_input) - -# Create an object to write a new collection as output -out = arvados.CollectionWriter() - -# Create a new file in the output collection -with out.open('0-filter.txt') as out_file: - # Iterate over every input file in the input collection - for input_file in collection.all_files(): - # Output every line in the file that starts with '0' - out_file.writelines(line for line in input_file if line.startswith('0')) - -# Commit the output to Keep. -output_locator = out.finish() - -# Use the resulting locator as the output for this task. -this_task.set_output(output_locator) - -# Done! diff --git a/doc/_includes/_alert-incomplete.liquid b/doc/_includes/_alert-incomplete.liquid deleted file mode 100644 index 8a62ec7a8e..0000000000 --- a/doc/_includes/_alert-incomplete.liquid +++ /dev/null @@ -1,11 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -
- -

Hi!

-

This section is incomplete. Please be patient with us as we fill in the blanks — or contribute to the documentation project.

-
diff --git a/doc/_includes/_alert_stub.liquid b/doc/_includes/_alert_stub.liquid deleted file mode 100644 index dd56f17ddf..0000000000 --- a/doc/_includes/_alert_stub.liquid +++ /dev/null @@ -1,11 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -
- -

Hi!

-

This section is incomplete. Please be patient with us as we fill in the blanks — or contribute to the documentation project.

-
diff --git a/doc/_includes/_arv_copy_expectations.liquid b/doc/_includes/_arv_copy_expectations.liquid deleted file mode 100644 index 2231b06a73..0000000000 --- a/doc/_includes/_arv_copy_expectations.liquid +++ /dev/null @@ -1,12 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -{% include 'notebox_begin' %} -As stated above, arv-copy is recursive by default and requires a working git repository in the destination cluster. If you do not have a repository created, you can follow the "Adding a new repository":{{site.baseurl}}/user/tutorials/add-new-repository.html page. We will use the *tutorial* repository created in that page as the example. - -
In addition, arv-copy requires git when copying to a git repository. Please make sure that git is installed and available. - -{% include 'notebox_end' %} diff --git a/doc/_includes/_concurrent_hash_script_py.liquid b/doc/_includes/_concurrent_hash_script_py.liquid deleted file mode 100644 index 2c55298841..0000000000 --- a/doc/_includes/_concurrent_hash_script_py.liquid +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/env python -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -import hashlib -import os -import arvados - -# Jobs consist of one or more tasks. A task is a single invocation of -# a crunch script. - -# Get the current task -this_task = arvados.current_task() - -# Tasks have a sequence number for ordering. All tasks -# with the current sequence number must finish successfully -# before tasks in the next sequence are started. -# The first task has sequence number 0 -if this_task['sequence'] == 0: - # Get the "input" field from "script_parameters" on the task object - job_input = arvados.current_job()['script_parameters']['input'] - - # Create a collection reader to read the input - cr = arvados.CollectionReader(job_input) - - # Loop over each stream in the collection (a stream is a subset of - # files that logically represents a directory) - for s in cr.all_streams(): - - # Loop over each file in the stream - for f in s.all_files(): - - # Synthesize a manifest for just this file - task_input = f.as_manifest() - - # Set attributes for a new task: - # 'job_uuid' the job that this task is part of - # 'created_by_job_task_uuid' this task that is creating the new task - # 'sequence' the sequence number of the new task - # 'parameters' the parameters to be passed to the new task - new_task_attrs = { - 'job_uuid': arvados.current_job()['uuid'], - 'created_by_job_task_uuid': arvados.current_task()['uuid'], - 'sequence': 1, - 'parameters': { - 'input':task_input - } - } - - # Ask the Arvados API server to create a new task, running the same - # script as the parent task specified in 'created_by_job_task_uuid' - arvados.api().job_tasks().create(body=new_task_attrs).execute() - - # Now tell the Arvados API server that this task executed successfully, - # even though it doesn't have any output. - this_task.set_output(None) -else: - # The task sequence was not 0, so it must be a parallel worker task - # created by the first task - - # Instead of getting "input" from the "script_parameters" field of - # the job object, we get it from the "parameters" field of the - # task object - this_task_input = this_task['parameters']['input'] - - collection = arvados.CollectionReader(this_task_input) - - # There should only be one file in the collection, so get the - # first one from the all files iterator. - input_file = next(collection.all_files()) - output_path = os.path.normpath(os.path.join(input_file.stream_name(), - input_file.name)) - - # Everything after this is the same as the first tutorial. - digestor = hashlib.new('md5') - for buf in input_file.readall(): - digestor.update(buf) - - out = arvados.CollectionWriter() - with out.open('md5sum.txt') as out_file: - out_file.write("{} {}\n".format(digestor.hexdigest(), output_path)) - - this_task.set_output(out.finish()) - -# Done! diff --git a/doc/_includes/_crunch1only_begin.liquid b/doc/_includes/_crunch1only_begin.liquid deleted file mode 100644 index 6dc304a928..0000000000 --- a/doc/_includes/_crunch1only_begin.liquid +++ /dev/null @@ -1,8 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -{% include 'notebox_begin_warning' %} -This section assumes the legacy Jobs API is available. Some newer installations have already disabled the Jobs API in favor of the Containers API. diff --git a/doc/_includes/_example_docker.liquid b/doc/_includes/_example_docker.liquid deleted file mode 100644 index 2d6335a62b..0000000000 --- a/doc/_includes/_example_docker.liquid +++ /dev/null @@ -1,34 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -{ - "name": "Example using R in a custom Docker image", - "components": { - "Rscript": { - "script": "run-command", - "script_version": "master", - "repository": "arvados", - "script_parameters": { - "command": [ - "Rscript", - "$(glob $(file $(myscript))/*.r)", - "$(glob $(dir $(mydata))/*.csv)" - ], - "myscript": { - "required": true, - "dataclass": "Collection" - }, - "mydata": { - "required": true, - "dataclass": "Collection" - } - }, - "runtime_constraints": { - "docker_image": "arvados/jobs-with-r" - } - } - } -} diff --git a/doc/_includes/_install_compute_docker.liquid b/doc/_includes/_install_compute_docker.liquid index 63c54aed72..fd5d88a9c3 100644 --- a/doc/_includes/_install_compute_docker.liquid +++ b/doc/_includes/_install_compute_docker.liquid @@ -6,7 +6,7 @@ SPDX-License-Identifier: CC-BY-SA-3.0 h2(#cgroups). Configure Linux cgroups accounting -Linux can report what compute resources are used by processes in a specific cgroup or Docker container. Crunch can use these reports to share that information with users running compute work. This can help pipeline authors debug and optimize their workflows. +Linux can report what compute resources are used by processes in a specific cgroup or Docker container. Crunch can use these reports to share that information with users running compute work. This can help workflow authors debug and optimize their workflows. To enable cgroups accounting, you must boot Linux with the command line parameters @cgroup_enable=memory swapaccount=1@. diff --git a/doc/_includes/_install_git.liquid b/doc/_includes/_install_git.liquid deleted file mode 100644 index d60379fa30..0000000000 --- a/doc/_includes/_install_git.liquid +++ /dev/null @@ -1,9 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -{% include 'notebox_begin' %} -The Arvados API and Git servers require Git 1.7.10 or later. -{% include 'notebox_end' %} diff --git a/doc/_includes/_install_rails_reconfigure.liquid b/doc/_includes/_install_rails_reconfigure.liquid deleted file mode 100644 index 4687431c2a..0000000000 --- a/doc/_includes/_install_rails_reconfigure.liquid +++ /dev/null @@ -1,17 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -Now that all your configuration is in place, rerun the {{railspkg}} package configuration to install necessary Ruby Gems and other server dependencies. On Debian-based systems: - -
~$ sudo dpkg-reconfigure {{railspkg}}
-
- -On Red Hat-based systems: - -
~$ sudo yum reinstall {{railspkg}}
-
- -You only need to do this manual step once, after initial configuration. When you make configuration changes in the future, you just need to restart Nginx for them to take effect. \ No newline at end of file diff --git a/doc/_includes/_install_ruby_and_bundler_sso.liquid b/doc/_includes/_install_ruby_and_bundler_sso.liquid deleted file mode 100644 index a8d14efdee..0000000000 --- a/doc/_includes/_install_ruby_and_bundler_sso.liquid +++ /dev/null @@ -1,69 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -Ruby 2.3 is recommended; Ruby 2.1 is also known to work. - -h4(#rvm). *Option 1: Install with RVM* - - -
sudo gpg --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3 7D2BAF1CF37B13E2069D6956105BD0E739499BDB
-\curl -sSL https://get.rvm.io | sudo bash -s stable --ruby=2.3
-
- -Either log out and log back in to activate RVM, or explicitly load it in all open shells like this: - - -
source /usr/local/rvm/scripts/rvm
-
- -Once RVM is activated in your shell, install Bundler: - - -
~$ gem install bundler
-
- -h4(#fromsource). *Option 2: Install from source* - -Install prerequisites for Debian 8: - - -
sudo apt-get install \
-    bison build-essential gettext libcurl3 libcurl3-gnutls \
-    libcurl4-openssl-dev libpcre3-dev libreadline-dev \
-    libssl-dev libxslt1.1 zlib1g-dev
-
- -Install prerequisites for CentOS 7: - - -
sudo yum install \
-    libyaml-devel glibc-headers autoconf gcc-c++ glibc-devel \
-    patch readline-devel zlib-devel libffi-devel openssl-devel \
-    make automake libtool bison sqlite-devel tar
-
- -Install prerequisites for Ubuntu 12.04 or 14.04: - - -
sudo apt-get install \
-    gawk g++ gcc make libc6-dev libreadline6-dev zlib1g-dev libssl-dev \
-    libyaml-dev libsqlite3-dev sqlite3 autoconf libgdbm-dev \
-    libncurses5-dev automake libtool bison pkg-config libffi-dev curl
-
- -Build and install Ruby: - - -
mkdir -p ~/src
-cd ~/src
-curl -f http://cache.ruby-lang.org/pub/ruby/2.3/ruby-2.3.3.tar.gz | tar xz
-cd ruby-2.3.3
-./configure --disable-install-rdoc
-make
-sudo make install
-
-sudo -i gem install bundler
-
diff --git a/doc/_includes/_install_runit.liquid b/doc/_includes/_install_runit.liquid deleted file mode 100644 index d5f8341311..0000000000 --- a/doc/_includes/_install_runit.liquid +++ /dev/null @@ -1,19 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -On Debian-based systems: - - -
~$ sudo apt-get install runit
-
-
- -On Red Hat-based systems: - - -
~$ sudo yum install runit
-
-
diff --git a/doc/_includes/_pipeline_deprecation_notice.liquid b/doc/_includes/_pipeline_deprecation_notice.liquid deleted file mode 100644 index 35c89be447..0000000000 --- a/doc/_includes/_pipeline_deprecation_notice.liquid +++ /dev/null @@ -1,9 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -{% include 'notebox_begin_warning' %} -Arvados pipeline templates are deprecated. The recommended way to develop new workflows for Arvados is using the "Common Workflow Language":{{site.baseurl}}/user/cwl/cwl-runner.html. -{% include 'notebox_end' %} diff --git a/doc/_includes/_run_command_foreach_example.liquid b/doc/_includes/_run_command_foreach_example.liquid deleted file mode 100644 index 8e3dd713d1..0000000000 --- a/doc/_includes/_run_command_foreach_example.liquid +++ /dev/null @@ -1,46 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -{ - "name":"run-command example pipeline", - "components":{ - "bwa-mem": { - "script": "run-command", - "script_version": "master", - "repository": "arvados", - "script_parameters": { - "command": [ - "bwa", - "mem", - "-t", - "$(node.cores)", - "$(glob $(dir $(reference_collection))/*.fasta)", - { - "foreach": "read_pair", - "command": "$(read_pair)" - } - ], - "task.stdout": "$(basename $(glob $(dir $(sample))/*_1.fastq)).sam", - "task.foreach": ["sample_subdir", "read_pair"], - "reference_collection": { - "required": true, - "dataclass": "Collection" - }, - "sample": { - "required": true, - "dataclass": "Collection" - }, - "sample_subdir": "$(dir $(sample))", - "read_pair": { - "value": { - "group": "sample_subdir", - "regex": "(.*)_[12]\\.fastq(\\.gz)?$" - } - } - } - } - } -} diff --git a/doc/_includes/_run_command_simple_example.liquid b/doc/_includes/_run_command_simple_example.liquid deleted file mode 100644 index b37ae9a88b..0000000000 --- a/doc/_includes/_run_command_simple_example.liquid +++ /dev/null @@ -1,43 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -{ - "name":"run-command example pipeline", - "components":{ - "bwa-mem": { - "script": "run-command", - "script_version": "master", - "repository": "arvados", - "script_parameters": { - "command": [ - "$(dir $(bwa_collection))/bwa", - "mem", - "-t", - "$(node.cores)", - "-R", - "@RG\\\tID:group_id\\\tPL:illumina\\\tSM:sample_id", - "$(glob $(dir $(reference_collection))/*.fasta)", - "$(glob $(dir $(sample))/*_1.fastq)", - "$(glob $(dir $(sample))/*_2.fastq)" - ], - "reference_collection": { - "required": true, - "dataclass": "Collection" - }, - "bwa_collection": { - "required": true, - "dataclass": "Collection", - "default": "39c6f22d40001074f4200a72559ae7eb+5745" - }, - "sample": { - "required": true, - "dataclass": "Collection" - }, - "task.stdout": "$(basename $(glob $(dir $(sample))/*_1.fastq)).sam" - } - } - } -} diff --git a/doc/_includes/_run_md5sum_py.liquid b/doc/_includes/_run_md5sum_py.liquid deleted file mode 100644 index 6d10672db6..0000000000 --- a/doc/_includes/_run_md5sum_py.liquid +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env python -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -import arvados - -# Automatically parallelize this job by running one task per file. -arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True, - input_as_path=True) - -# Get the input file for the task -input_file = arvados.get_task_param_mount('input') - -# Run the external 'md5sum' program on the input file -stdoutdata, stderrdata = arvados.util.run_command(['md5sum', input_file]) - -# Save the standard output (stdoutdata) to "md5sum.txt" in the output collection -out = arvados.CollectionWriter() -with out.open('md5sum.txt') as out_file: - out_file.write(stdoutdata) -arvados.current_task().set_output(out.finish()) diff --git a/doc/_includes/_ssh_addkey.liquid b/doc/_includes/_ssh_addkey.liquid index 7a8a992b68..de0da6a767 100644 --- a/doc/_includes/_ssh_addkey.liquid +++ b/doc/_includes/_ssh_addkey.liquid @@ -18,6 +18,10 @@ Paste your public key into the text area labeled *Public Key*, and click on the h1(#login). Using SSH to log into an Arvados VM -To see a list of virtual machines that you have access to and determine the name and login information, click on the dropdown menu icon in the upper right corner of the top navigation menu to access the user settings menu and click on the menu item *Virtual machines* to go to the Virtual machines page. This page lists the virtual machines you can access. The *Host name* column lists the name of each available VM. The *Login name* column will have a list of comma separated values of the form @you@. In this guide the hostname will be *_shell_* and the login will be *_you_*. Replace these with your hostname and login name as appropriate. +To see a list of virtual machines that you have access to, click on the dropdown menu icon in the upper right corner of the top navigation menu to access the user settings menu, then click on the menu item *Virtual machines* to go to the Virtual machines page. +This page lists the virtual machines you can access. The *Host name* column lists the name of each available VM. The *Login name* column lists your login name on that VM. The *Command line* column provides a sample @ssh@ command line. +At the bottom of the page there may be additional instructions for connecting your specific Arvados instance. If so, follow your site-specific instructions. If there are no site-specific instructions, you can probably connect directly with @ssh@. + +The following are generic instructions. In the examples the login will be *_you_* and the hostname will be *_shell.ClusterID.example.com_* and . Replace these with your login name and hostname as appropriate. diff --git a/doc/_includes/_tutorial_bwa_sortsam_pipeline.liquid b/doc/_includes/_tutorial_bwa_sortsam_pipeline.liquid deleted file mode 100644 index 3b39403488..0000000000 --- a/doc/_includes/_tutorial_bwa_sortsam_pipeline.liquid +++ /dev/null @@ -1,78 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -{ - "name": "Tutorial align using bwa mem and SortSam", - "components": { - "bwa-mem": { - "script": "run-command", - "script_version": "master", - "repository": "arvados", - "script_parameters": { - "command": [ - "$(dir $(bwa_collection))/bwa", - "mem", - "-t", - "$(node.cores)", - "-R", - "@RG\\\tID:group_id\\\tPL:illumina\\\tSM:sample_id", - "$(glob $(dir $(reference_collection))/*.fasta)", - "$(glob $(dir $(sample))/*_1.fastq)", - "$(glob $(dir $(sample))/*_2.fastq)" - ], - "reference_collection": { - "required": true, - "dataclass": "Collection" - }, - "bwa_collection": { - "required": true, - "dataclass": "Collection", - "default": "39c6f22d40001074f4200a72559ae7eb+5745" - }, - "sample": { - "required": true, - "dataclass": "Collection" - }, - "task.stdout": "$(basename $(glob $(dir $(sample))/*_1.fastq)).sam" - }, - "runtime_constraints": { - "docker_image": "bcosc/arv-base-java", - "arvados_sdk_version": "master" - } - }, - "SortSam": { - "script": "run-command", - "script_version": "847459b3c257aba65df3e0cbf6777f7148542af2", - "repository": "arvados", - "script_parameters": { - "command": [ - "java", - "-Xmx4g", - "-Djava.io.tmpdir=$(tmpdir)", - "-jar", - "$(dir $(picard))/SortSam.jar", - "CREATE_INDEX=True", - "SORT_ORDER=coordinate", - "VALIDATION_STRINGENCY=LENIENT", - "INPUT=$(glob $(dir $(input))/*.sam)", - "OUTPUT=$(basename $(glob $(dir $(input))/*.sam)).sort.bam" - ], - "input": { - "output_of": "bwa-mem" - }, - "picard": { - "required": true, - "dataclass": "Collection", - "default": "88447c464574ad7f79e551070043f9a9+1970" - } - }, - "runtime_constraints": { - "docker_image": "bcosc/arv-base-java", - "arvados_sdk_version": "master" - } - } - } -} diff --git a/doc/_includes/_tutorial_cluster_name.liquid b/doc/_includes/_tutorial_cluster_name.liquid deleted file mode 100644 index 22fbc463e6..0000000000 --- a/doc/_includes/_tutorial_cluster_name.liquid +++ /dev/null @@ -1,9 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -{% include 'notebox_begin' %} -This tutorial assumes you are using the default Arvados instance, @qr1hi@. If you are using a different instance, replace @qr1hi@ with your instance. See "Accessing Arvados Workbench":{{site.baseurl}}/user/getting_started/workbench.html for more details. -{% include 'notebox_end' %} diff --git a/doc/_includes/_tutorial_expectations.liquid b/doc/_includes/_tutorial_expectations.liquid index 6c4fbeb1f3..09b18f0d4d 100644 --- a/doc/_includes/_tutorial_expectations.liquid +++ b/doc/_includes/_tutorial_expectations.liquid @@ -5,5 +5,5 @@ SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} {% include 'notebox_begin' %} -This tutorial assumes that you are logged into an Arvados VM instance (instructions for "Webshell":{{site.baseurl}}/user/getting_started/vm-login-with-webshell.html or "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or you have installed the Arvados "FUSE Driver":{{site.baseurl}}/sdk/python/arvados-fuse.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation and have a "working environment.":{{site.baseurl}}/user/getting_started/check-environment.html +This tutorial assumes that you have access to the "Arvados command line tools":/user/getting_started/setup-cli.html and have set the "API token":{{site.baseurl}}/user/reference/api-tokens.html and confirmed a "working environment.":{{site.baseurl}}/user/getting_started/check-environment.html . {% include 'notebox_end' %} diff --git a/doc/_includes/_tutorial_hash_script_py.liquid b/doc/_includes/_tutorial_hash_script_py.liquid deleted file mode 100644 index 9eacb763dc..0000000000 --- a/doc/_includes/_tutorial_hash_script_py.liquid +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -import hashlib # Import the hashlib module to compute MD5. -import os # Import the os module for basic path manipulation -import arvados # Import the Arvados sdk module - -# Automatically parallelize this job by running one task per file. -# This means that if the input consists of many files, each file will -# be processed in parallel on different nodes enabling the job to -# be completed quicker. -arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True, - input_as_path=True) - -# Get object representing the current task -this_task = arvados.current_task() - -# Create the message digest object that will compute the MD5 hash -digestor = hashlib.new('md5') - -# Get the input file for the task -input_id, input_path = this_task['parameters']['input'].split('/', 1) - -# Open the input collection -input_collection = arvados.CollectionReader(input_id) - -# Open the input file for reading -with input_collection.open(input_path) as input_file: - for buf in input_file.readall(): # Iterate the file's data blocks - digestor.update(buf) # Update the MD5 hash object - -# Write a new collection as output -out = arvados.CollectionWriter() - -# Write an output file with one line: the MD5 value and input path -with out.open('md5sum.txt') as out_file: - out_file.write("{} {}/{}\n".format(digestor.hexdigest(), input_id, - os.path.normpath(input_path))) - -# Commit the output to Keep. -output_locator = out.finish() - -# Use the resulting locator as the output for this task. -this_task.set_output(output_locator) - -# Done! diff --git a/doc/_includes/_crunch1only_end.liquid b/doc/_includes/_tutorial_hello_cwl.liquid similarity index 50% rename from doc/_includes/_crunch1only_end.liquid rename to doc/_includes/_tutorial_hello_cwl.liquid index a3f2278fd3..ae1ec80ab2 100644 --- a/doc/_includes/_crunch1only_end.liquid +++ b/doc/_includes/_tutorial_hello_cwl.liquid @@ -1,7 +1,11 @@ +#!/usr/bin/env cwl-runner {% comment %} Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} - -{% include 'notebox_end' %} +cwlVersion: v1.0 +class: CommandLineTool +inputs: [] +outputs: [] +arguments: ["echo", "hello world!"] diff --git a/doc/_includes/_tutorial_submit_job.liquid b/doc/_includes/_tutorial_submit_job.liquid deleted file mode 100644 index 548a6196a4..0000000000 --- a/doc/_includes/_tutorial_submit_job.liquid +++ /dev/null @@ -1,25 +0,0 @@ -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -{ - "name":"My md5 pipeline", - "components":{ - "do_hash":{ - "repository":"$USER/$USER", - "script":"hash.py", - "script_version":"master", - "runtime_constraints":{ - "docker_image":"arvados/jobs" - }, - "script_parameters":{ - "input":{ - "required": true, - "dataclass": "Collection" - } - } - } - } -} diff --git a/doc/_includes/_what_is_cwl.liquid b/doc/_includes/_what_is_cwl.liquid index fea11b3a32..7fbe7b8156 100644 --- a/doc/_includes/_what_is_cwl.liquid +++ b/doc/_includes/_what_is_cwl.liquid @@ -4,4 +4,4 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} -The "Common Workflow Language (CWL)":http://commonwl.org is a multi-vendor open standard for describing analysis tools and workflows that are portable across a variety of platforms. CWL is the primary way to develop and run workflows for Arvados. Arvados supports versions "v1.0":http://commonwl.org/v1.0 and "v1.1":http://commonwl.org/v1.1 of the CWL specification. +The "Common Workflow Language (CWL)":http://commonwl.org is a multi-vendor open standard for describing analysis tools and workflows that are portable across a variety of platforms. CWL is the primary way to develop and run workflows for Arvados. Arvados supports versions "v1.0":http://commonwl.org/v1.0 , "v1.1":http://commonwl.org/v1.1 and "v1.2":http://commonwl.org/v1.2 of the CWL standard. diff --git a/doc/admin/scoped-tokens.html.textile.liquid b/doc/admin/scoped-tokens.html.textile.liquid index 5bad5f25b3..18578a78d6 100644 --- a/doc/admin/scoped-tokens.html.textile.liquid +++ b/doc/admin/scoped-tokens.html.textile.liquid @@ -4,6 +4,12 @@ navsection: admin title: Securing API access with scoped tokens ... +{% comment %} +Copyright (C) The Arvados Authors. All rights reserved. + +SPDX-License-Identifier: CC-BY-SA-3.0 +{% endcomment %} + By default, Arvados API tokens grant unlimited access to a user account, and admin account tokens have unlimited access to the whole system. If you want to grant restricted access to a user account, you can create a "scoped token" which is an Arvados API token which is limited to accessing specific APIs. One use of token scopes is to grant access to data, such as a collection, to users who do not have an Arvados accounts on your cluster. This is done by creating scoped token that only allows getting a specific record. An example of this is "creating a collection sharing link.":{{site.baseurl}}/sdk/python/cookbook.html#sharing_link diff --git a/doc/admin/token-expiration-policy.html.textile.liquid b/doc/admin/token-expiration-policy.html.textile.liquid new file mode 100644 index 0000000000..f5ee61b181 --- /dev/null +++ b/doc/admin/token-expiration-policy.html.textile.liquid @@ -0,0 +1,62 @@ +--- +layout: default +navsection: admin +title: Setting token expiration policy +... + +{% comment %} +Copyright (C) The Arvados Authors. All rights reserved. + +SPDX-License-Identifier: CC-BY-SA-3.0 +{% endcomment %} + +When a user logs in to Workbench, they receive a newly created token that grants access to the Arvados API on behalf of that user. By default, this token does not expire until the user explicitly logs off. + +Security policies, such as for GxP Compliance, may require that tokens expire by default in order to limit the risk associated with a token being leaked. + +The @Login.TokenLifetime@ configuration enables the administrator to set a expiration lifetime for tokens granted through the login flow. + +h2. Setting token expiration + +Suppose that the organization's security policy requires that user sessions should not be valid for more than 12 hours, the cluster configuration should be set like the following: + +
+Clusters:
+  zzzzz:
+    ...
+    Login:
+      TokenLifetime: 12h
+    ...
+
+ +With this configuration, users will have to re-login every 12 hours. + +When this configuration is active, the workbench client will also be "untrusted" by default. This means tokens issued to workbench cannot be used to list other tokens issued to the user, and cannot be used to grant new tokens. This stops an attacker from leveraging a leaked token to aquire other tokens. + +The default @TokenLifetime@ is zero, which disables this feature. + +h2. Applying policy to existing tokens + +If you have an existing Arvados installation and want to set a token lifetime policy, there may be user tokens already granted. The administrator can use the following @rake@ tasks to enforce the new policy. + +The @db:check_long_lived_tokens@ task will list which users have tokens with no expiration date. + + +
# bundle exec rake db:check_long_lived_tokens
+Found 6 long-lived tokens from users:
+user2,user2@example.com,zzzzz-tpzed-5vzt5wc62k46p6r
+admin,admin@example.com,zzzzz-tpzed-6drplgwq9nm5cox
+user1,user1@example.com,zzzzz-tpzed-ftz2tfurbpf7xox
+
+
+ +To apply the new policy to existing tokens, use the @db:fix_long_lived_tokens@ task. + + +
# bundle exec rake db:fix_long_lived_tokens
+Setting token expiration to: 2020-08-25 03:30:50 +0000
+6 tokens updated.
+
+
+ +NOTE: These rake tasks adjust the expiration of all tokens except those belonging to the system root user (@zzzzz-tpzed-000000000000000@). If you have tokens used by automated service accounts that need to be long-lived, you can "create tokens that don't expire using the command line":user-management-cli.html#create-token . diff --git a/doc/admin/user-management-cli.html.textile.liquid b/doc/admin/user-management-cli.html.textile.liquid index 33969ea8f8..6892176604 100644 --- a/doc/admin/user-management-cli.html.textile.liquid +++ b/doc/admin/user-management-cli.html.textile.liquid @@ -16,7 +16,7 @@ ARVADOS_API_HOST={{ site.arvados_api_host }} ARVADOS_API_TOKEN=1234567890qwertyuiopasdfghjklzxcvbnm1234567890zzzz -In these examples, @x1u39-tpzed-3kz0nwtjehhl0u4@ is the sample user account. Replace with the uuid of the user you wish to manipulate. +In these examples, @zzzzz-tpzed-3kz0nwtjehhl0u4@ is the sample user account. Replace with the uuid of the user you wish to manipulate. See "user management":{{site.baseurl}}/admin/activation.html for an overview of how to use these commands. @@ -24,28 +24,68 @@ h3. Setup a user This creates a default git repository and VM login. Enables user to self-activate using Workbench. -
-arv user setup --uuid x1u39-tpzed-3kz0nwtjehhl0u4
-
+ +
$ arv user setup --uuid zzzzz-tpzed-3kz0nwtjehhl0u4
+
+
+ h3. Deactivate user -
-arv user unsetup --uuid x1u39-tpzed-3kz0nwtjehhl0u4
-
+ +
$ arv user unsetup --uuid zzzzz-tpzed-3kz0nwtjehhl0u4
+
+
+ When deactivating a user, you may also want to "reassign ownership of their data":{{site.baseurl}}/admin/reassign-ownership.html . h3. Directly activate user -
-arv user update --uuid "x1u39-tpzed-3kz0nwtjehhl0u4" --user '{"is_active":true}'
-
+ +
$ arv user update --uuid "zzzzz-tpzed-3kz0nwtjehhl0u4" --user '{"is_active":true}'
+
+
+ +Note: this bypasses user agreements checks, and does not set up the user with a default git repository or VM login. -Note this bypasses user agreements checks, and does not set up the user with a default git repository or VM login. +h3(#create-token). Create a token for a user +As an admin, you can create tokens for other users. + + +
$ arv api_client_authorization create --api-client-authorization '{"owner_uuid": "zzzzz-tpzed-fr97h9t4m5jffxs"}'
+{
+ "href":"/api_client_authorizations/zzzzz-gj3su-yyyyyyyyyyyyyyy",
+ "kind":"arvados#apiClientAuthorization",
+ "etag":"9yk144t0v6cvyp0342exoh2vq",
+ "uuid":"zzzzz-gj3su-yyyyyyyyyyyyyyy",
+ "owner_uuid":"zzzzz-tpzed-fr97h9t4m5jffxs",
+ "created_at":"2020-03-12T20:36:12.517375422Z",
+ "modified_by_client_uuid":null,
+ "modified_by_user_uuid":null,
+ "modified_at":null,
+ "user_id":3,
+ "api_client_id":7,
+ "api_token":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+ "created_by_ip_address":null,
+ "default_owner_uuid":null,
+ "expires_at":null,
+ "last_used_at":null,
+ "last_used_by_ip_address":null,
+ "scopes":["all"]
+}
+
+
+ + +To get the token string, combine the values of @uuid@ and @api_token@ in the form "v2/$uuid/$api_token". In this example the string that goes in @ARVADOS_API_TOKEN@ would be: + +
+ARVADOS_API_TOKEN=v2/zzzzz-gj3su-yyyyyyyyyyyyyyy/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+
-h2. Permissions +h2. Adding Permissions h3. VM login diff --git a/doc/architecture/federation.html.textile.liquid b/doc/architecture/federation.html.textile.liquid index e2b80de707..2ec2c93fb4 100644 --- a/doc/architecture/federation.html.textile.liquid +++ b/doc/architecture/federation.html.textile.liquid @@ -26,7 +26,7 @@ Clusters are identified by a five-digit alphanumeric id (numbers and lowercase l Cluster identifiers are mapped API server hosts one of two ways: -* Through DNS resolution, under the @arvadosapi.com@ domain. For example, the API server for the cluster @qr1hi@ can be found at @qr1hi.arvadosapi.com@. To register a cluster id for free under @arvadosapi.com@, contact "info@curii.com":mailto:info@curii.com +* Through DNS resolution, under the @arvadosapi.com@ domain. For example, the API server for the cluster @pirca@ can be found at @pirca.arvadosapi.com@. To register a cluster id for free under @arvadosapi.com@, contact "info@curii.com":mailto:info@curii.com * Through explicit configuration: The @RemoteClusters@ section of @/etc/arvados/config.yml@ (for arvados-controller) diff --git a/doc/examples/pipeline_templates/gatk-exome-fq-snp.json b/doc/examples/pipeline_templates/gatk-exome-fq-snp.json deleted file mode 100644 index 481dda38ca..0000000000 --- a/doc/examples/pipeline_templates/gatk-exome-fq-snp.json +++ /dev/null @@ -1,175 +0,0 @@ -{ - "name":"GATK / exome PE fastq to snp", - "components":{ - "extract-reference":{ - "repository":"arvados", - "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153", - "script":"file-select", - "script_parameters":{ - "names":[ - "human_g1k_v37.fasta.gz", - "human_g1k_v37.fasta.fai.gz", - "human_g1k_v37.dict.gz" - ], - "input":"d237a90bae3870b3b033aea1e99de4a9+10820+K@qr1hi" - }, - "output_name":false - }, - "bwa-index":{ - "repository":"arvados", - "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153", - "script":"bwa-index", - "script_parameters":{ - "input":{ - "output_of":"extract-reference" - }, - "bwa_tbz":{ - "value":"8b6e2c4916133e1d859c9e812861ce13+70", - "required":true - } - }, - "output_name":false - }, - "bwa-aln":{ - "repository":"arvados", - "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153", - "script":"bwa-aln", - "script_parameters":{ - "input":{ - "dataclass":"Collection", - "required":"true" - }, - "reference_index":{ - "output_of":"bwa-index" - }, - "samtools_tgz":{ - "value":"c777e23cf13e5d5906abfdc08d84bfdb+74", - "required":true - }, - "bwa_tbz":{ - "value":"8b6e2c4916133e1d859c9e812861ce13+70", - "required":true - } - }, - "runtime_constraints":{ - "max_tasks_per_node":1 - }, - "output_name":false - }, - "picard-gatk2-prep":{ - "repository":"arvados", - "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153", - "script":"picard-gatk2-prep", - "script_parameters":{ - "input":{ - "output_of":"bwa-aln" - }, - "reference":{ - "output_of":"extract-reference" - }, - "picard_zip":{ - "value":"687f74675c6a0e925dec619cc2bec25f+77", - "required":true - } - }, - "runtime_constraints":{ - "max_tasks_per_node":1 - }, - "output_name":false - }, - "GATK2-realign":{ - "repository":"arvados", - "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153", - "script":"GATK2-realign", - "script_parameters":{ - "input":{ - "output_of":"picard-gatk2-prep" - }, - "gatk_bundle":{ - "value":"d237a90bae3870b3b033aea1e99de4a9+10820+K@qr1hi", - "required":true - }, - "picard_zip":{ - "value":"687f74675c6a0e925dec619cc2bec25f+77", - "required":true - }, - "gatk_tbz":{ - "value":"7e0a277d6d2353678a11f56bab3b13f2+87", - "required":true - }, - "regions":{ - "value":"13b53dbe1ec032dfc495fd974aa5dd4a+87/S02972011_Covered_sort_merged.bed" - }, - "region_padding":{ - "value":10 - } - }, - "runtime_constraints":{ - "max_tasks_per_node":2 - }, - "output_name":false - }, - "GATK2-bqsr":{ - "repository":"arvados", - "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153", - "script":"GATK2-bqsr", - "script_parameters":{ - "input":{ - "output_of":"GATK2-realign" - }, - "gatk_bundle":{ - "value":"d237a90bae3870b3b033aea1e99de4a9+10820+K@qr1hi", - "required":true - }, - "picard_zip":{ - "value":"687f74675c6a0e925dec619cc2bec25f+77", - "required":true - }, - "gatk_tbz":{ - "value":"7e0a277d6d2353678a11f56bab3b13f2+87", - "required":true - } - }, - "output_name":false - }, - "GATK2-merge-call":{ - "repository":"arvados", - "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153", - "script":"GATK2-merge-call", - "script_parameters":{ - "input":{ - "output_of":"GATK2-bqsr" - }, - "gatk_bundle":{ - "value":"d237a90bae3870b3b033aea1e99de4a9+10820+K@qr1hi", - "required":true - }, - "picard_zip":{ - "value":"687f74675c6a0e925dec619cc2bec25f+77", - "required":true - }, - "gatk_tbz":{ - "value":"7e0a277d6d2353678a11f56bab3b13f2+87", - "required":true - }, - "regions":{ - "value":"13b53dbe1ec032dfc495fd974aa5dd4a+87/S02972011_Covered_sort_merged.bed" - }, - "region_padding":{ - "value":10 - }, - "GATK2_UnifiedGenotyper_args":{ - "default":[ - "-stand_call_conf", - "30.0", - "-stand_emit_conf", - "30.0", - "-dcov", - "200" - ] - } - }, - "output_name":"Variant calls from UnifiedGenotyper" - } - } -} diff --git a/doc/examples/pipeline_templates/rtg-fq-snp.json b/doc/examples/pipeline_templates/rtg-fq-snp.json deleted file mode 100644 index c951c4c433..0000000000 --- a/doc/examples/pipeline_templates/rtg-fq-snp.json +++ /dev/null @@ -1,76 +0,0 @@ -{ - "name":"Real Time Genomics / PE fastq to snp", - "components":{ - "extract_reference":{ - "script":"file-select", - "script_parameters":{ - "names":[ - "human_g1k_v37.fasta.gz" - ], - "input":"d237a90bae3870b3b033aea1e99de4a9+10820+K@qr1hi" - }, - "script_version":"4c1f8cd1431ece2ef11c130d48bb2edfd2f00ec2" - }, - "reformat_reference":{ - "script_version":"4c1f8cd1431ece2ef11c130d48bb2edfd2f00ec2", - "script":"rtg-fasta2sdf", - "script_parameters":{ - "input":{ - "output_of":"extract_reference" - }, - "rtg_binary_zip":"5d33618193f763b7dc3a3fdfa11d452e+95+K@qr1hi", - "rtg_license":{ - "optional":false - } - } - }, - "reformat_reads":{ - "script_version":"4c1f8cd1431ece2ef11c130d48bb2edfd2f00ec2", - "script":"rtg-fastq2sdf", - "script_parameters":{ - "input":{ - "optional":false - }, - "rtg_binary_zip":"5d33618193f763b7dc3a3fdfa11d452e+95+K@qr1hi", - "rtg_license":{ - "optional":false - } - } - }, - "map_reads":{ - "script_version":"4c1f8cd1431ece2ef11c130d48bb2edfd2f00ec2", - "script":"rtg-map", - "script_parameters":{ - "input":{ - "output_of":"reformat_reads" - }, - "reference":{ - "output_of":"reformat_reference" - }, - "rtg_binary_zip":"5d33618193f763b7dc3a3fdfa11d452e+95+K@qr1hi", - "rtg_license":{ - "optional":false - } - }, - "runtime_constraints":{ - "max_tasks_per_node":1 - } - }, - "report_snp":{ - "script_version":"4c1f8cd1431ece2ef11c130d48bb2edfd2f00ec2", - "script":"rtg-snp", - "script_parameters":{ - "input":{ - "output_of":"map_reads" - }, - "reference":{ - "output_of":"reformat_reference" - }, - "rtg_binary_zip":"5d33618193f763b7dc3a3fdfa11d452e+95+K@qr1hi", - "rtg_license":{ - "optional":false - } - } - } - } -} diff --git a/doc/examples/ruby/list-active-nodes.rb b/doc/examples/ruby/list-active-nodes.rb deleted file mode 100755 index a3eb20540b..0000000000 --- a/doc/examples/ruby/list-active-nodes.rb +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env ruby -# Copyright (C) The Arvados Authors. All rights reserved. -# -# SPDX-License-Identifier: CC-BY-SA-3.0 - -abort 'Error: Ruby >= 1.9.3 required.' if RUBY_VERSION < '1.9.3' - -require 'arvados' - -arv = Arvados.new(api_version: 'v1') -arv.node.list[:items].each do |node| - if node[:crunch_worker_state] != 'down' - ping_age = (Time.now - Time.parse(node[:last_ping_at])).to_i rescue -1 - puts "#{node[:uuid]} #{node[:crunch_worker_state]} #{ping_age}" - end -end diff --git a/doc/install/copy_pipeline_from_curoverse.html.textile.liquid b/doc/install/copy_pipeline_from_curoverse.html.textile.liquid deleted file mode 100644 index 2c2b3c466e..0000000000 --- a/doc/install/copy_pipeline_from_curoverse.html.textile.liquid +++ /dev/null @@ -1,68 +0,0 @@ ---- -layout: default -navsection: installguide -title: Copy pipeline from the Arvados Playground -... -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -This tutorial describes how to find and copy a publicly shared pipeline from the Arvados Playground. Please note that you can use similar steps to copy any template you can access from the Arvados Playground to your cluster. - -h3. Access a public pipeline in the Arvados Playground using Workbench - -the Arvados Playground provides access to some public data, which can be used to experience Arvados in action. Let's access a public pipeline and copy it to your cluster, so that you can run it in your environment. - -Start by visiting the "*Arvados Playground public projects page*":https://playground.arvados.org/projects/public. This page lists all the publicly accessible projects in this arvados installation. Click on one of these projects to open it. We will use "*lobSTR v.3 (Public)*":https://playground.arvados.org/projects/qr1hi-j7d0g-up6qgpqz5ie2vfq as the example in this tutorial. - -Once in the "*lobSTR v.3 (Public)*":https://playground.arvados.org/projects/qr1hi-j7d0g-up6qgpqz5ie2vfq project, click on the *Pipeline templates* tab. In the pipeline templates tab, you will see a template named *lobSTR v.3*. Click on the *Show* button to the left of this name. This will take to you to the "*lobSTR v.3*":https://playground.arvados.org/pipeline_templates/qr1hi-p5p6p-9pkaxt6qjnkxhhu template page. - -Once in this page, you can take the *uuid* of this template from the address bar, which is *qr1hi-p5p6p-9pkaxt6qjnkxhhu*. Next, we will copy this template to your Arvados instance. - -h3. Copying a pipeline template from the Arvados Playground to your cluster - -As described above, navigate to the publicly shared pipeline template "*lobSTR v.3*":https://playground.arvados.org/pipeline_templates/qr1hi-p5p6p-9pkaxt6qjnkxhhu on the Arvados Playground. We will now copy this template with uuid *qr1hi-p5p6p-9pkaxt6qjnkxhhu* to your cluster. - -{% include 'tutorial_expectations' %} - -We will use the Arvados *arv-copy* command to copy this template to your cluster. In order to use arv-copy, first you need to setup the source and destination cluster configuration files. Here, *qr1hi* would be the source cluster and your Arvados instance would be the *dst_cluster*. - -During this setup, if you have an account in the Arvados Playground, you can use "your access token":#using-your-token to create the source configuration file. If you do not have an account in the Arvados Playground, you can use the "anonymous access token":#using-anonymous-token for the source cluster configuration. - -h4(#using-anonymous-token). *Configuring source and destination setup files using anonymous access token* - -Configure the source and destination clusters as described in the "*Using arv-copy*":http://doc.arvados.org/user/topics/arv-copy.html tutorial in user guide, while using *5vqmz9mik2ou2k9objb8pnyce8t97p6vocyaouqo3qalvpmjs5* as the API token for source configuration. - - -
~$ cd ~/.config/arvados
-~$ echo "ARVADOS_API_HOST=qr1hi.arvadosapi.com" >> qr1hi.conf
-~$ echo "ARVADOS_API_TOKEN=5vqmz9mik2ou2k9objb8pnyce8t97p6vocyaouqo3qalvpmjs5" >> qr1hi.conf
-
-
- -You can now copy the pipeline template from *qr1hi* to *your cluster*. Replace *dst_cluster* with the *ClusterID* of your cluster. - - -
~$  arv-copy --no-recursive --src qr1hi --dst dst_cluster qr1hi-p5p6p-9pkaxt6qjnkxhhu
-
-
- -*Note:* When you are using anonymous access token to copy the template, you will not be able to do a recursive copy since you will not be able to provide the dst-git-repo parameter. In order to perform a recursive copy of the template, you would need to use the Arvados API token from your account as explained in the "using your token":#using-your-token section below. - -h4(#using-your-token). *Configuring source and destination setup files using personal access token* - -If you already have an account in the Arvados Playground, you can follow the instructions in the "*Using arv-copy*":http://doc.arvados.org/user/topics/arv-copy.html user guide to get your *Current token* for source and destination clusters, and use them to create the source *qr1hi.conf* and dst_cluster.conf configuration files. - -You can now copy the pipeline template from *qr1hi* to *your cluster* with or without recursion. Replace *dst_cluster* with the *ClusterID* of your cluster. - -*Non-recursive copy:* - -
~$  arv-copy --no-recursive --src qr1hi --dst dst_cluster qr1hi-p5p6p-9pkaxt6qjnkxhhu
-
- -*Recursive copy:* - -
~$ arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial qr1hi-p5p6p-9pkaxt6qjnkxhhu
-
diff --git a/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid b/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid index 23da428b39..cdecc88152 100644 --- a/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid +++ b/doc/install/crunch2-cloud/install-compute-node.html.textile.liquid @@ -92,8 +92,6 @@ Options: Azure secrets file which will be sourced from this script --azure-resource-group (default: false, required if building for Azure) Azure resource group - --azure-storage-account (default: false, required if building for Azure) - Azure storage account --azure-location (default: false, required if building for Azure) Azure location, e.g. centralus, eastus, westeurope --azure-sku (default: unset, required if building for Azure, e.g. 16.04-LTS) @@ -117,7 +115,6 @@ h2(#azure). Build an Azure image
~$ ./build.sh --json-file arvados-images-azure.json \
            --arvados-cluster-id ClusterID \
            --azure-resource-group ResourceGroup \
-           --azure-storage-account StorageAccount \
            --azure-location AzureRegion \
            --azure-sku AzureSKU \
            --azure-secrets-file AzureSecretsFilePath \
@@ -126,7 +123,7 @@ h2(#azure). Build an Azure image
 
 
-For @ClusterID@, fill in your cluster ID. The @ResourceGroup@, @StorageAccount@ and @AzureRegion@ (e.g. 'eastus2') should be configured for where you want the compute image to be generated and stored. The @AzureSKU@ is the SKU of the base image to be used, e.g. '18.04-LTS' for Ubuntu 18.04. +For @ClusterID@, fill in your cluster ID. The @ResourceGroup@ and @AzureRegion@ (e.g. 'eastus2') should be configured for where you want the compute image to be generated and stored. The @AzureSKU@ is the SKU of the base image to be used, e.g. '18.04-LTS' for Ubuntu 18.04. @AzureSecretsFilePath@ should be replaced with the path to a shell script that loads the Azure secrets with sufficient permissions to create the image. The file would look like this: diff --git a/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid index faa7c5b953..6841778470 100644 --- a/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid +++ b/doc/install/crunch2-cloud/install-dispatch-cloud.html.textile.liquid @@ -93,6 +93,77 @@ h4. Minimal configuration example for Amazon EC2 h4. Minimal configuration example for Azure +Using managed disks: + + +
    Containers:
+      CloudVMs:
+        ImageID: "zzzzz-compute-v1597349873"
+        Driver: azure
+        DriverParameters:
+          # Credentials.
+          SubscriptionID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientSecret: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          TenantID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+
+          # Data center where VMs will be allocated
+          Location: centralus
+
+          # The resource group where the VM and virtual NIC will be
+          # created.
+          ResourceGroup: zzzzz
+          NetworkResourceGroup: yyyyy   # only if different from ResourceGroup
+          Network: xxxxx
+          Subnet: xxxxx-subnet-private
+
+          # The resource group where the disk image is stored, only needs to
+          # be specified if it is different from ResourceGroup
+          ImageResourceGroup: aaaaa
+
+
+
+ +Azure recommends using managed images. If you plan to start more than 20 VMs simultaneously, Azure recommends using a shared image gallery instead to avoid slowdowns and timeouts during the creation of the VMs. + +Using an image from a shared image gallery: + + +
    Containers:
+      CloudVMs:
+        ImageID: "shared_image_gallery_image_definition_name"
+        Driver: azure
+        DriverParameters:
+          # Credentials.
+          SubscriptionID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+          ClientSecret: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+          TenantID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+
+          # Data center where VMs will be allocated
+          Location: centralus
+
+          # The resource group where the VM and virtual NIC will be
+          # created.
+          ResourceGroup: zzzzz
+          NetworkResourceGroup: yyyyy   # only if different from ResourceGroup
+          Network: xxxxx
+          Subnet: xxxxx-subnet-private
+
+          # The resource group where the disk image is stored, only needs to
+          # be specified if it is different from ResourceGroup
+          ImageResourceGroup: aaaaa
+
+          # (azure) shared image gallery: the name of the gallery
+          SharedImageGalleryName: "shared_image_gallery_1"
+          # (azure) shared image gallery: the version of the image definition
+          SharedImageGalleryImageVersion: "0.0.1"
+
+
+
+ +Using unmanaged disks (deprecated): +
    Containers:
       CloudVMs:
diff --git a/doc/sdk/java-v2/example.html.textile.liquid b/doc/sdk/java-v2/example.html.textile.liquid
index e73f968c8d..8d2fc2f4af 100644
--- a/doc/sdk/java-v2/example.html.textile.liquid
+++ b/doc/sdk/java-v2/example.html.textile.liquid
@@ -28,7 +28,7 @@ public class CollectionExample {
     public static void main(String[] argv) {
 	ConfigProvider conf = ExternalConfigProvider.builder().
 	    apiProtocol("https").
-	    apiHost("qr1hi.arvadosapi.com").
+	    apiHost("zzzzz.arvadosapi.com").
 	    apiPort(443).
 	    apiToken("...").
 	    build();
diff --git a/doc/sdk/python/cookbook.html.textile.liquid b/doc/sdk/python/cookbook.html.textile.liquid
index bd7f64b33d..82741c3ea6 100644
--- a/doc/sdk/python/cookbook.html.textile.liquid
+++ b/doc/sdk/python/cookbook.html.textile.liquid
@@ -47,7 +47,7 @@ h2. Get input of a CWL workflow
 {% codeblock as python %}
 import arvados
 api = arvados.api()
-container_request_uuid="qr1hi-xvhdp-zzzzzzzzzzzzzzz"
+container_request_uuid="zzzzz-xvhdp-zzzzzzzzzzzzzzz"
 container_request = api.container_requests().get(uuid=container_request_uuid).execute()
 print(container_request["mounts"]["/var/lib/cwl/cwl.input.json"])
 {% endcodeblock %}
@@ -58,7 +58,7 @@ h2. Get output of a CWL workflow
 import arvados
 import arvados.collection
 api = arvados.api()
-container_request_uuid="qr1hi-xvhdp-zzzzzzzzzzzzzzz"
+container_request_uuid="zzzzz-xvhdp-zzzzzzzzzzzzzzz"
 container_request = api.container_requests().get(uuid=container_request_uuid).execute()
 collection = arvados.collection.CollectionReader(container_request["output_uuid"])
 print(collection.open("cwl.output.json").read())
@@ -81,7 +81,7 @@ def get_cr_state(cr_uuid):
             return 'On hold'
         else:
             return 'Queued'
-    elif c['state'] == 'Complete' and c['exit_code'] != 0
+    elif c['state'] == 'Complete' and c['exit_code'] != 0:
         return 'Failed'
     elif c['state'] == 'Running':
         if c['runtime_status'].get('error', None):
@@ -89,7 +89,7 @@ def get_cr_state(cr_uuid):
         elif c['runtime_status'].get('warning', None):
             return 'Warning'
     return c['state']
-container_request_uuid = 'qr1hi-xvhdp-zzzzzzzzzzzzzzz'
+container_request_uuid = 'zzzzz-xvhdp-zzzzzzzzzzzzzzz'
 print(get_cr_state(container_request_uuid))
 {% endcodeblock %}
 
@@ -98,7 +98,7 @@ h2. List input of child requests
 {% codeblock as python %}
 import arvados
 api = arvados.api()
-parent_request_uuid = "qr1hi-xvhdp-zzzzzzzzzzzzzzz"
+parent_request_uuid = "zzzzz-xvhdp-zzzzzzzzzzzzzzz"
 namefilter = "bwa%"  # the "like" filter uses SQL pattern match syntax
 container_request = api.container_requests().get(uuid=parent_request_uuid).execute()
 parent_container_uuid = container_request["container_uuid"]
@@ -117,7 +117,7 @@ h2. List output of child requests
 {% codeblock as python %}
 import arvados
 api = arvados.api()
-parent_request_uuid = "qr1hi-xvhdp-zzzzzzzzzzzzzzz"
+parent_request_uuid = "zzzzz-xvhdp-zzzzzzzzzzzzzzz"
 namefilter = "bwa%"  # the "like" filter uses SQL pattern match syntax
 container_request = api.container_requests().get(uuid=parent_request_uuid).execute()
 parent_container_uuid = container_request["container_uuid"]
@@ -136,7 +136,7 @@ h2. List failed child requests
 {% codeblock as python %}
 import arvados
 api = arvados.api()
-parent_request_uuid = "qr1hi-xvhdp-zzzzzzzzzzzzzzz"
+parent_request_uuid = "zzzzz-xvhdp-zzzzzzzzzzzzzzz"
 container_request = api.container_requests().get(uuid=parent_request_uuid).execute()
 parent_container_uuid = container_request["container_uuid"]
 child_requests = api.container_requests().list(filters=[
@@ -144,7 +144,7 @@ child_requests = api.container_requests().list(filters=[
 child_containers = {c["container_uuid"]: c for c in child_requests["items"]}
 cancelled_child_containers = api.containers().list(filters=[
     ["exit_code", "!=", "0"],
-    ["uuid", "in", child_containers.keys()]], limit=1000).execute()
+    ["uuid", "in", list(child_containers.keys())]], limit=1000).execute()
 for c in cancelled_child_containers["items"]:
     print("%s (%s)" % (child_containers[c["uuid"]]["name"], child_containers[c["uuid"]]["uuid"]))
 {% endcodeblock %}
@@ -155,11 +155,12 @@ h2. Get log of a child request
 import arvados
 import arvados.collection
 api = arvados.api()
-container_request_uuid = "qr1hi-xvhdp-zzzzzzzzzzzzzzz"
+container_request_uuid = "zzzzz-xvhdp-zzzzzzzzzzzzzzz"
 container_request = api.container_requests().get(uuid=container_request_uuid).execute()
 collection = arvados.collection.CollectionReader(container_request["log_uuid"])
 for c in collection:
-    print(collection.open(c).read())
+    if isinstance(collection.find(c), arvados.arvfile.ArvadosFile):
+        print(collection.open(c).read())
 {% endcodeblock %}
 
 h2(#sharing_link). Create a collection sharing link
@@ -168,7 +169,7 @@ h2(#sharing_link). Create a collection sharing link
 import arvados
 api = arvados.api()
 download="https://your.download.server"
-collection_uuid="qr1hi-4zz18-zzzzzzzzzzzzzzz"
+collection_uuid="zzzzz-4zz18-zzzzzzzzzzzzzzz"
 token = api.api_client_authorizations().create(body={"api_client_authorization":{"scopes": [
     "GET /arvados/v1/collections/%s" % collection_uuid,
     "GET /arvados/v1/collections/%s/" % collection_uuid,
@@ -184,8 +185,8 @@ Note, if two collections have files of the same name, the contents will be conca
 import arvados
 import arvados.collection
 api = arvados.api()
-project_uuid = "qr1hi-tpzed-zzzzzzzzzzzzzzz"
-collection_uuids = ["qr1hi-4zz18-aaaaaaaaaaaaaaa", "qr1hi-4zz18-bbbbbbbbbbbbbbb"]
+project_uuid = "zzzzz-tpzed-zzzzzzzzzzzzzzz"
+collection_uuids = ["zzzzz-4zz18-aaaaaaaaaaaaaaa", "zzzzz-4zz18-bbbbbbbbbbbbbbb"]
 combined_manifest = ""
 for u in collection_uuids:
     c = api.collections().get(uuid=u).execute()
@@ -200,7 +201,7 @@ h2. Upload a file into a new collection
 import arvados
 import arvados.collection
 
-project_uuid = "qr1hi-j7d0g-zzzzzzzzzzzzzzz"
+project_uuid = "zzzzz-j7d0g-zzzzzzzzzzzzzzz"
 collection_name = "My collection"
 filename = "file1.txt"
 
@@ -222,7 +223,7 @@ h2. Download a file from a collection
 import arvados
 import arvados.collection
 
-collection_uuid = "qr1hi-4zz18-zzzzzzzzzzzzzzz"
+collection_uuid = "zzzzz-4zz18-zzzzzzzzzzzzzzz"
 filename = "file1.txt"
 
 api = arvados.api()
diff --git a/doc/sdk/python/events.html.textile.liquid b/doc/sdk/python/events.html.textile.liquid
index afbec20d95..302af20c83 100644
--- a/doc/sdk/python/events.html.textile.liquid
+++ b/doc/sdk/python/events.html.textile.liquid
@@ -2,7 +2,7 @@
 layout: default
 navsection: sdk
 navmenu: Python
-title: Subscribing to events
+title: Subscribing to database events
 ...
 {% comment %}
 Copyright (C) The Arvados Authors. All rights reserved.
diff --git a/doc/sdk/ruby/example.html.textile.liquid b/doc/sdk/ruby/example.html.textile.liquid
index b8c0dcbb80..f2ea1c09df 100644
--- a/doc/sdk/ruby/example.html.textile.liquid
+++ b/doc/sdk/ruby/example.html.textile.liquid
@@ -55,7 +55,7 @@ first_repo = repos[:items][0]
 puts "UUID of first repo returned is #{first_repo[:uuid]}"
 {% endcodeblock %}
 
-UUID of first repo returned is qr1hi-s0uqq-b1bnybpx3u5temz
+UUID of first repo returned is zzzzz-s0uqq-b1bnybpx3u5temz
 
 h2. update
 
diff --git a/doc/start/getting_started/firstpipeline.html.textile.liquid b/doc/start/getting_started/firstpipeline.html.textile.liquid
deleted file mode 100644
index 43369a3bbf..0000000000
--- a/doc/start/getting_started/firstpipeline.html.textile.liquid
+++ /dev/null
@@ -1,94 +0,0 @@
----
-layout: default
-navsection: start 
-title: Run your first pipeline in minutes
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-h2. LobSTR v3 
-
-In this quickstart guide, we'll run an existing pipeline with pre-existing data. Step-by-step instructions are shown below. You can follow along using your own local install or by using the Arvados Playground (any Google account can be used to log in).
-
-(For more information about this pipeline, see our detailed lobSTR guide).
-
-
-
-Tip: You may need to make your browser window bigger to see full-size images in the gallery above.
diff --git a/doc/start/getting_started/nextsteps.html.textile.liquid b/doc/start/getting_started/nextsteps.html.textile.liquid
deleted file mode 100644
index dd059ea8d4..0000000000
--- a/doc/start/getting_started/nextsteps.html.textile.liquid
+++ /dev/null
@@ -1,12 +0,0 @@
----
-layout: default
-navsection: start 
-title: Check out the User Guide 
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-Now that you've finished the Getting Started guide, check out the "User Guide":{{site.baseurl}}/user/index.html. The User Guide goes into more depth than the Getting Started guide, covers how to develop your own pipelines in addition to using pre-existing pipelines, covers the Arvados command line tools in addition to the Workbench graphical interface to Arvados, and can be referenced in any order.
diff --git a/doc/start/getting_started/publicproject.html.textile.liquid b/doc/start/getting_started/publicproject.html.textile.liquid
deleted file mode 100644
index 0fabad7aa7..0000000000
--- a/doc/start/getting_started/publicproject.html.textile.liquid
+++ /dev/null
@@ -1,133 +0,0 @@
----
-layout: default
-navsection: start
-title: Visit an Arvados Public Project
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-h2. Mason Lab - Pathomap / Ancestry Mapper (Public)
-
-You can see Arvados in action by accessing the Mason Lab - Pathomap / Ancestry Mapper (Public) project. By visiting this project, you can see what an Arvados project is, access data collections in this project, and click through a pipeline instance's contents.
-
-You will be accessing this project in read-only mode and will not be able to make any modifications such as running a new pipeline instance.
-
-
-
-Tip: You may need to make your browser window bigger to see full-size images in the gallery above.
diff --git a/doc/start/getting_started/sharedata.html.textile.liquid b/doc/start/getting_started/sharedata.html.textile.liquid
deleted file mode 100644
index 02e0b70329..0000000000
--- a/doc/start/getting_started/sharedata.html.textile.liquid
+++ /dev/null
@@ -1,102 +0,0 @@
----
-layout: default
-navsection: start 
-title: Sharing Data 
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-You can easily share data entirely through Workbench, the web interface to Arvados.
-
-h2. Upload and share your existing data
-
-Step-by-step instructions are shown below.
-
-
-
-Tip: You may need to make your browser window bigger to see full-size images in the gallery above.
diff --git a/doc/start/index.html.textile.liquid b/doc/start/index.html.textile.liquid
deleted file mode 100644
index cddfb8e441..0000000000
--- a/doc/start/index.html.textile.liquid
+++ /dev/null
@@ -1,133 +0,0 @@
----
-layout: default
-navsection: start 
-title: Welcome to Arvados!
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-This guide provides an introduction to using Arvados to solve big data bioinformatics problems.
-
-h2. What is Arvados?
-
-Arvados is a free and open source bioinformatics platform for genomic and biomedical data.
-
-We address the needs of IT directors, lab principals, and bioinformaticians.
-
-h2. Why use Arvados?
-
-Arvados enables you to quickly begin using cloud computing resources in your bioinformatics work. It allows you to track your methods and datasets, share them securely, and easily re-run analyses.
-
-h3. Take a look (Screenshots gallery) 
-
-
-
-Note: Workbench is the web interface to Arvados.
-Tip: You may need to make your browser window bigger to see full-size images in the gallery above.
-
-h3. Key Features
-
-
    -
  • Track your methods
    -We log every compute job: software versions, machine images, input and output data hashes. Rely on a computer, not your memory and your note-taking skills.

  • -
  • Share your methods
    -Show other people what you did. Let them use your workflow on their own data. Publish a permalink to your methods and data, so others can reproduce and build on them easily.

  • -
  • Track data origin
    -Did you really only use fully consented public data in this analysis?

  • -
  • Get results sooner
    -Run your compute jobs faster by using multi-nodes and multi-cores, even if your programs are single-threaded.

  • -
diff --git a/doc/user/cwl/bwa-mem/bwa-mem-input-mixed.yml b/doc/user/cwl/bwa-mem/bwa-mem-input-mixed.yml index 73bd9f599c..73dd65c463 100755 --- a/doc/user/cwl/bwa-mem/bwa-mem-input-mixed.yml +++ b/doc/user/cwl/bwa-mem/bwa-mem-input-mixed.yml @@ -15,15 +15,15 @@ cwl:tool: bwa-mem.cwl reference: class: File location: keep:2463fa9efeb75e099685528b3b9071e0+438/19.fasta.bwt - arv:collectionUUID: qr1hi-4zz18-pwid4w22a40jp8l + arv:collectionUUID: jutro-4zz18-tv416l321i4r01e read_p1: class: File location: keep:ae480c5099b81e17267b7445e35b4bc7+180/HWI-ST1027_129_D0THKACXX.1_1.fastq - arv:collectionUUID: qr1hi-4zz18-h615rgfmqt3wje0 + arv:collectionUUID: jutro-4zz18-8k5hsvee0izv2g3 read_p2: class: File location: keep:ae480c5099b81e17267b7445e35b4bc7+180/HWI-ST1027_129_D0THKACXX.1_2.fastq - arv:collectionUUID: qr1hi-4zz18-h615rgfmqt3wje0 + arv:collectionUUID: jutro-4zz18-8k5hsvee0izv2g3 group_id: arvados_tutorial sample_id: HWI-ST1027_129 PL: illumina diff --git a/doc/user/cwl/bwa-mem/bwa-mem-input-uuids.yml b/doc/user/cwl/bwa-mem/bwa-mem-input-uuids.yml index 7e71e959eb..e76aa78173 100755 --- a/doc/user/cwl/bwa-mem/bwa-mem-input-uuids.yml +++ b/doc/user/cwl/bwa-mem/bwa-mem-input-uuids.yml @@ -9,13 +9,13 @@ cwl:tool: bwa-mem.cwl reference: class: File - location: keep:qr1hi-4zz18-pwid4w22a40jp8l/19.fasta.bwt + location: keep:jutro-4zz18-tv416l321i4r01e/19.fasta.bwt read_p1: class: File - location: keep:qr1hi-4zz18-h615rgfmqt3wje0/HWI-ST1027_129_D0THKACXX.1_1.fastq + location: keep:jutro-4zz18-8k5hsvee0izv2g3/HWI-ST1027_129_D0THKACXX.1_1.fastq read_p2: class: File - location: keep:qr1hi-4zz18-h615rgfmqt3wje0/HWI-ST1027_129_D0THKACXX.1_2.fastq + location: keep:jutro-4zz18-8k5hsvee0izv2g3/HWI-ST1027_129_D0THKACXX.1_2.fastq group_id: arvados_tutorial sample_id: HWI-ST1027_129 PL: illumina diff --git a/doc/user/cwl/bwa-mem/bwa-mem.cwl b/doc/user/cwl/bwa-mem/bwa-mem.cwl index 2001971264..018867c83e 100755 --- a/doc/user/cwl/bwa-mem/bwa-mem.cwl +++ b/doc/user/cwl/bwa-mem/bwa-mem.cwl @@ -8,13 +8,13 @@ class: CommandLineTool hints: DockerRequirement: - dockerPull: lh3lh3/bwa + dockerPull: quay.io/biocontainers/bwa:0.7.17--ha92aebf_3 -baseCommand: [mem] +baseCommand: [bwa, mem] arguments: - {prefix: "-t", valueFrom: $(runtime.cores)} - - {prefix: "-R", valueFrom: "@RG\tID:$(inputs.group_id)\tPL:$(inputs.PL)\tSM:$(inputs.sample_id)"} + - {prefix: "-R", valueFrom: '@RG\\\tID:$(inputs.group_id)\\\tPL:$(inputs.PL)\\\tSM:$(inputs.sample_id)'} inputs: reference: diff --git a/doc/user/cwl/cwl-run-options.html.textile.liquid b/doc/user/cwl/cwl-run-options.html.textile.liquid index 725528f44d..761d198ee4 100644 --- a/doc/user/cwl/cwl-run-options.html.textile.liquid +++ b/doc/user/cwl/cwl-run-options.html.textile.liquid @@ -1,7 +1,7 @@ --- layout: default navsection: userguide -title: "Using arvados-cwl-runner" +title: "arvados-cwl-runner options" ... {% comment %} Copyright (C) The Arvados Authors. All rights reserved. @@ -74,10 +74,10 @@ Use the @--name@ and @--output-name@ options to specify the name of the workflow
~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner --name "Example bwa run" --output-name "Example bwa output" bwa-mem.cwl bwa-mem-input.yml
 arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
 2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
-2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to zzzzz-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job zzzzz-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Complete
 2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
 {
     "aligned_sam": {
@@ -98,9 +98,9 @@ To submit a workflow and exit immediately, use the @--no-wait@ option.  This wil
 
~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner --no-wait bwa-mem.cwl bwa-mem-input.yml
 arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
 2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Uploaded to qr1hi-4zz18-eqnfwrow8aysa9q
-2016-06-30 15:07:52 arvados.cwl-runner[12480] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-qr1hi-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Uploaded to zzzzz-4zz18-eqnfwrow8aysa9q
+2016-06-30 15:07:52 arvados.cwl-runner[12480] INFO: Submitted job zzzzz-8i9sb-fm2n3b1w0l6bskg
+zzzzz-8i9sb-fm2n3b1w0l6bskg
 
@@ -111,10 +111,10 @@ To run a workflow with local control, use @--local@. This means that the host w
~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner --local bwa-mem.cwl bwa-mem-input.yml
 arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-07-01 10:05:19 arvados.cwl-runner[16290] INFO: Pipeline instance qr1hi-d1hrv-92wcu6ldtio74r4
-2016-07-01 10:05:28 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Queued
-2016-07-01 10:05:29 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Running
-2016-07-01 10:05:45 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Complete
+2016-07-01 10:05:19 arvados.cwl-runner[16290] INFO: Pipeline instance zzzzz-d1hrv-92wcu6ldtio74r4
+2016-07-01 10:05:28 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-2nzzfbuf9zjrj4g) is Queued
+2016-07-01 10:05:29 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-2nzzfbuf9zjrj4g) is Running
+2016-07-01 10:05:45 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-2nzzfbuf9zjrj4g) is Complete
 2016-07-01 10:05:46 arvados.cwl-runner[16290] INFO: Overall process status is success
 {
     "aligned_sam": {
diff --git a/doc/user/cwl/cwl-runner.html.textile.liquid b/doc/user/cwl/cwl-runner.html.textile.liquid
index 2be803b52a..442a60b04f 100644
--- a/doc/user/cwl/cwl-runner.html.textile.liquid
+++ b/doc/user/cwl/cwl-runner.html.textile.liquid
@@ -1,7 +1,7 @@
 ---
 layout: default
 navsection: userguide
-title: "Running an Arvados workflow"
+title: "Starting a Workflow at the Command Line"
 ...
 {% comment %}
 Copyright (C) The Arvados Authors. All rights reserved.
@@ -13,44 +13,38 @@ SPDX-License-Identifier: CC-BY-SA-3.0
 
 {% include 'tutorial_expectations' %}
 
-{% include 'notebox_begin' %}
-
-By default, the @arvados-cwl-runner@ is installed on Arvados shell nodes.  If you want to submit jobs from somewhere else, such as your workstation, you may install "arvados-cwl-runner.":#setup
-
-{% include 'notebox_end' %}
-
 This tutorial will demonstrate how to submit a workflow at the command line using @arvados-cwl-runner@.
 
-h2. Running arvados-cwl-runner
+# "Get the tutorial files":#get-files
+# "Submitting a workflow to an Arvados cluster":#submitting
+# "Registering a workflow to use in Workbench":#registering
+# "Make a workflow file directly executable":#executable
 
-h3. Get the example files
+h2(#get-files). Get the tutorial files
 
-The tutorial files are located in the "documentation section of the Arvados source repository:":https://github.com/arvados/arvados/tree/master/doc/user/cwl/bwa-mem
+The tutorial files are located in the documentation section of the Arvados source repository, which can be found on "git.arvados.org":https://git.arvados.org/arvados.git/tree/HEAD:/doc/user/cwl/bwa-mem or "github":https://github.com/arvados/arvados/tree/master/doc/user/cwl/bwa-mem
 
 
-
~$ git clone https://github.com/arvados/arvados
+
~$ git clone https://git.arvados.org/arvados.git
 ~$ cd arvados/doc/user/cwl/bwa-mem
 
-The tutorial data is hosted on "https://playground.arvados.org":https://playground.arvados.org (also referred to by the identifier *qr1hi*). If you are using a different Arvados instance, you may need to copy the data to your own instance. The easiest way to do this is with "arv-copy":{{site.baseurl}}/user/topics/arv-copy.html (this requires signing up for a free playground.arvados.org account). +The tutorial data is hosted on "https://playground.arvados.org":https://playground.arvados.org (also referred to by the identifier *pirca*). If you are using a different Arvados instance, you may need to copy the data to your own instance. One way to do this is with "arv-copy":{{site.baseurl}}/user/topics/arv-copy.html (this requires signing up for a free playground.arvados.org account). -
~$ arv-copy --src qr1hi --dst settings 2463fa9efeb75e099685528b3b9071e0+438
-~$ arv-copy --src qr1hi --dst settings ae480c5099b81e17267b7445e35b4bc7+180
-~$ arv-copy --src qr1hi --dst settings 655c6cd07550151b210961ed1d3852cf+57
+
~$ arv-copy --src pirca --dst settings 2463fa9efeb75e099685528b3b9071e0+438
+~$ arv-copy --src pirca --dst settings ae480c5099b81e17267b7445e35b4bc7+180
 
If you do not wish to create an account on "https://playground.arvados.org":https://playground.arvados.org, you may download the files anonymously and upload them to your local Arvados instance: -"https://playground.arvados.org/collections/2463fa9efeb75e099685528b3b9071e0+438":https://playground.arvados.org/collections/2463fa9efeb75e099685528b3b9071e0+438 - -"https://playground.arvados.org/collections/ae480c5099b81e17267b7445e35b4bc7+180":https://playground.arvados.org/collections/ae480c5099b81e17267b7445e35b4bc7+180 +"https://collections.pirca.arvadosapi.com/c=2463fa9efeb75e099685528b3b9071e0+438/":https://collections.pirca.arvadosapi.com/c=2463fa9efeb75e099685528b3b9071e0+438/ -"https://playground.arvados.org/collections/655c6cd07550151b210961ed1d3852cf+57":https://playground.arvados.org/collections/655c6cd07550151b210961ed1d3852cf+57 +"https://collections.pirca.arvadosapi.com/c=ae480c5099b81e17267b7445e35b4bc7+180/":https://collections.pirca.arvadosapi.com/c=ae480c5099b81e17267b7445e35b4bc7+180/ -h2. Submitting a workflow to an Arvados cluster +h2(#submitting). Submitting a workflow to an Arvados cluster h3. Submit a workflow and wait for results @@ -62,10 +56,10 @@ Use @arvados-cwl-runner@ to submit CWL workflows to Arvados. After submitting t
~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner bwa-mem.cwl bwa-mem-input.yml
 arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
 2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
-2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to zzzzz-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job zzzzz-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Complete
 2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
 {
     "aligned_sam": {
@@ -88,15 +82,19 @@ If you reference a file in "arv-mount":{{site.baseurl}}/user/tutorials/tutorial-
 
 If you reference a local file which is not in @arv-mount@, then @arvados-cwl-runner@ will upload the file to Keep and use the Keep URI reference from the upload.
 
-You can also execute CWL files directly from Keep:
+You can also execute CWL files that have been uploaded Keep:
 
 
-
~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner keep:655c6cd07550151b210961ed1d3852cf+57/bwa-mem.cwl bwa-mem-input.yml
+

+~/arvados/doc/user/cwl/bwa-mem$ arv-put --portable-data-hash --name "bwa-mem.cwl" bwa-mem.cwl
+2020-08-20 13:40:02 arvados.arv_put[12976] INFO: Collection saved as 'bwa-mem.cwl'
+f141fc27e7cfa7f7b6d208df5e0ee01b+59
+~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner keep:f141fc27e7cfa7f7b6d208df5e0ee01b+59/bwa-mem.cwl bwa-mem-input.yml
 arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
-2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to zzzzz-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job zzzzz-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Complete
 2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
 {
     "aligned_sam": {
@@ -109,50 +107,128 @@ arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107,
 
+Note: uploading a workflow file to Keep is _not_ the same as registering the workflow for use in Workbench. See "Registering a workflow to use in Workbench":#registering below. + h3. Work reuse Workflows submitted with @arvados-cwl-runner@ will take advantage of Arvados job reuse. If you submit a workflow which is identical to one that has run before, it will short cut the execution and return the result of the previous run. This also applies to individual workflow steps. For example, a two step workflow where the first step has run before will reuse results for first step and only execute the new second step. You can disable this behavior with @--disable-reuse@. h3. Command line options -See "Using arvados-cwl-runner":{{site.baseurl}}/user/cwl/cwl-run-options.html +See "arvados-cwl-runner options":{{site.baseurl}}/user/cwl/cwl-run-options.html -h2(#setup). Setting up arvados-cwl-runner +h2(#registering). Registering a workflow to use in Workbench -By default, the @arvados-cwl-runner@ is installed on Arvados shell nodes. If you want to submit jobs from somewhere else, such as your workstation, you may install @arvados-cwl-runner@ using @pip@: +Use @--create-workflow@ to register a CWL workflow with Arvados. This enables you to share workflows with other Arvados users, and run them by clicking the Run a process... button on the Workbench Dashboard and on the command line by UUID. -
~$ virtualenv ~/venv
-~$ . ~/venv/bin/activate
-~$ pip install -U setuptools
-~$ pip install arvados-cwl-runner
+
~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner --create-workflow bwa-mem.cwl
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Upload local files: "bwa-mem.cwl"
+2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Uploaded to zzzzz-4zz18-7e0hedrmkuyoei3
+2016-07-01 12:21:01 arvados.cwl-runner[15796] INFO: Created template zzzzz-p5p6p-rjleou1dwr167v5
+zzzzz-p5p6p-rjleou1dwr167v5
 
-h3. Check Docker access +You can provide a partial input file to set default values for the workflow input parameters. You can also use the @--name@ option to set the name of the workflow: -In order to pull and upload Docker images, @arvados-cwl-runner@ requires access to Docker. You do not need Docker if the Docker images you intend to use are already available in Arvados. + +
~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner --name "My workflow with defaults" --create-workflow bwa-mem.cwl bwa-mem-template.yml
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Upload local files: "bwa-mem.cwl"
+2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Uploaded to zzzzz-4zz18-0f91qkovk4ml18o
+2016-07-01 14:09:50 arvados.cwl-runner[3730] INFO: Created template zzzzz-p5p6p-0deqe6nuuyqns2i
+zzzzz-p5p6p-zuniv58hn8d0qd8
+
+
-You can determine if you have access to Docker by running @docker version@: +h3. Running registered workflows at the command line + +You can run a registered workflow at the command line by its UUID: -
~$ docker version
-Client:
- Version:      1.9.1
- API version:  1.21
- Go version:   go1.4.2
- Git commit:   a34a1d5
- Built:        Fri Nov 20 12:59:02 UTC 2015
- OS/Arch:      linux/amd64
-
-Server:
- Version:      1.9.1
- API version:  1.21
- Go version:   go1.4.2
- Git commit:   a34a1d5
- Built:        Fri Nov 20 12:59:02 UTC 2015
- OS/Arch:      linux/amd64
+
~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner pirca-7fd4e-3nqbw08vtjl8ybz --help
+INFO /home/peter/work/scripts/venv3/bin/arvados-cwl-runner 2.1.0.dev20200814195416, arvados-python-client 2.1.0.dev20200814195416, cwltool 3.0.20200807132242
+INFO Resolved 'pirca-7fd4e-3nqbw08vtjl8ybz' to 'arvwf:pirca-7fd4e-3nqbw08vtjl8ybz#main'
+usage: pirca-7fd4e-3nqbw08vtjl8ybz [-h] [--PL PL] [--group_id GROUP_ID]
+                                   [--read_p1 READ_P1] [--read_p2 READ_P2]
+                                   [--reference REFERENCE]
+                                   [--sample_id SAMPLE_ID]
+                                   [job_order]
+
+positional arguments:
+  job_order             Job input json file
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --PL PL
+  --group_id GROUP_ID
+  --read_p1 READ_P1     The reads, in fastq format.
+  --read_p2 READ_P2     For mate paired reads, the second file (optional).
+  --reference REFERENCE
+                        The index files produced by `bwa index`
+  --sample_id SAMPLE_ID
 
-If this returns an error, contact the sysadmin of your cluster for assistance. +h2(#executable). Make a workflow file directly executable + +You can make a workflow file directly executable (@cwl-runner@ should be an alias to @arvados-cwl-runner@) by adding the following line to the top of the file: + + +
#!/usr/bin/env cwl-runner
+
+
+ + +
~/arvados/doc/user/cwl/bwa-mem$ ./bwa-mem.cwl bwa-mem-input.yml
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to zzzzz-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job zzzzz-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
+{
+    "aligned_sam": {
+        "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+        "class": "File",
+        "size": 30738986
+    }
+}
+
+
+ +You can even make an input file directly executable the same way with the following two lines at the top: + + +
#!/usr/bin/env cwl-runner
+cwl:tool: bwa-mem.cwl
+
+
+ + +
~/arvados/doc/user/cwl/bwa-mem$ ./bwa-mem-input.yml
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to zzzzz-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job zzzzz-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
+{
+    "aligned_sam": {
+        "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+        "class": "File",
+        "size": 30738986
+    }
+}
+
+
+ +h2(#setup). Setting up arvados-cwl-runner + +See "Arvados CWL Runner":{{site.baseurl}}/sdk/python/arvados-cwl-runner.html diff --git a/doc/user/cwl/cwl-style.html.textile.liquid b/doc/user/cwl/cwl-style.html.textile.liquid index ee36014cb5..bd07161ce3 100644 --- a/doc/user/cwl/cwl-style.html.textile.liquid +++ b/doc/user/cwl/cwl-style.html.textile.liquid @@ -1,7 +1,7 @@ --- layout: default navsection: userguide -title: Writing Portable High-Performance Workflows +title: Guidelines for Writing High-Performance Portable Workflows ... {% comment %} Copyright (C) The Arvados Authors. All rights reserved. diff --git a/doc/user/cwl/cwl-versions.html.textile.liquid b/doc/user/cwl/cwl-versions.html.textile.liquid index 5fcfcbe3bc..ac679dc154 100644 --- a/doc/user/cwl/cwl-versions.html.textile.liquid +++ b/doc/user/cwl/cwl-versions.html.textile.liquid @@ -1,7 +1,7 @@ --- layout: default navsection: userguide -title: CWL version and API support +title: CWL version support ... {% comment %} Copyright (C) The Arvados Authors. All rights reserved. @@ -9,6 +9,8 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} +Arvados supports CWL v1.0, v1.1 and v1.2. + h2(#v12). Upgrading your workflows to CWL v1.2 If you are starting from a CWL v1.0 document, see "Upgrading your workflows to CWL v1.1":#v11 below. diff --git a/doc/user/getting_started/check-environment.html.textile.liquid b/doc/user/getting_started/check-environment.html.textile.liquid index b707891a1e..1097e4e9d8 100644 --- a/doc/user/getting_started/check-environment.html.textile.liquid +++ b/doc/user/getting_started/check-environment.html.textile.liquid @@ -16,14 +16,14 @@ Check that you are able to access the Arvados API server using @arv user current
$ arv user current
 {
- "href":"https://qr1hi.arvadosapi.com/arvados/v1/users/qr1hi-xioed-9z2p3pn12yqdaem",
+ "href":"https://zzzzz.arvadosapi.com/arvados/v1/users/zzzzz-xioed-9z2p3pn12yqdaem",
  "kind":"arvados#user",
  "etag":"8u0xwb9f3otb2xx9hto4wyo03",
- "uuid":"qr1hi-tpzed-92d3kxnimy3d4e8",
- "owner_uuid":"qr1hi-tpqed-23iddeohxta2r59",
+ "uuid":"zzzzz-tpzed-92d3kxnimy3d4e8",
+ "owner_uuid":"zzzzz-tpqed-23iddeohxta2r59",
  "created_at":"2013-12-02T17:05:47Z",
- "modified_by_client_uuid":"qr1hi-xxfg8-owxa2oa2s33jyej",
- "modified_by_user_uuid":"qr1hi-tpqed-23iddeohxta2r59",
+ "modified_by_client_uuid":"zzzzz-xxfg8-owxa2oa2s33jyej",
+ "modified_by_user_uuid":"zzzzz-tpqed-23iddeohxta2r59",
  "modified_at":"2013-12-02T17:07:08Z",
  "updated_at":"2013-12-05T19:51:08Z",
  "email":"you@example.com",
diff --git a/doc/user/getting_started/ssh-access-unix.html.textile.liquid b/doc/user/getting_started/ssh-access-unix.html.textile.liquid
index 284d0a1f04..80cb391314 100644
--- a/doc/user/getting_started/ssh-access-unix.html.textile.liquid
+++ b/doc/user/getting_started/ssh-access-unix.html.textile.liquid
@@ -9,7 +9,7 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
-This document is for accessing an Arvados VM using SSH keys in Unix environments (Linux, OS X, Cygwin). If you would like to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Windows environment, please visit the "Accessing an Arvados VM with SSH - Windows Environments":ssh-access-windows.html page.
+This document is for accessing an Arvados VM using SSH keys in Unix-like environments (Linux, macOS, Cygwin, Windows Subsystem for Linux). If you would like to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Windows environment, please visit the "Accessing an Arvados VM with SSH - Windows Environments":ssh-access-windows.html page.
 
 {% include 'ssh_intro' %}
 
@@ -49,7 +49,7 @@ ssh-rsa AAAAB3NzaC1ycEDoNotUseExampleKeyDoNotUseExampleKeyDoNotUseExampleKeyDoNo
 
 Now you can set up @ssh-agent@ (next) or proceed with "adding your key to the Arvados Workbench.":#workbench
 
-h3. Set up ssh-agent (recommended)
+h3. Set up ssh-agent (optional)
 
 If you find you are entering your passphrase frequently, you can use @ssh-agent@ to manage your credentials.  Use @ssh-add -l@ to test if you already have ssh-agent running:
 
@@ -80,11 +80,21 @@ When everything is set up, @ssh-add -l@ should yield output that looks something
 
 {% include 'ssh_addkey' %}
 
-h3. Connecting to the virtual machine
+h3. Connecting directly
 
-Use the following command to connect to the _shell_ VM instance as _you_.  Replace *you@shell* at the end of the following command with your *login* and *hostname* from Workbench:
+If the VM is available on the public Internet (or you are on the same private network as the VM) you can connect directly with @ssh@.  You can probably copy-and-paste the text from *Command line* column directly into a terminal.
 
-notextile. 
$ ssh -o "ProxyCommand ssh -p2222 turnout@switchyard.{{ site.arvados_api_host }} -x -a shell" -x you@shell
+Use the following example command to connect as _you_ to the _shell.ClusterID.example.com_ VM instance. Replace *you@shell.ClusterID.example.com* at the end of the following command with your *login* and *hostname* from Workbench. + +notextile.
$ ssh you@shell.ClusterID.example.com
+ +h3. Connecting through switchyard + +Some Arvados installations use "switchyard" to isolate shell VMs from the public Internet. + +Use the following example command to connect to the _shell_ VM instance as _you_. Replace *you@shell* at the end of the following command with your *login* and *hostname* from Workbench: + +notextile.
$ ssh -o "ProxyCommand ssh -p2222 turnout@switchyard.ClusterID.example.com -x -a shell" -x you@shell
This command does several things at once. You usually cannot log in directly to virtual machines over the public Internet. Instead, you log into a "switchyard" server and then tell the switchyard which virtual machine you want to connect to. @@ -99,7 +109,7 @@ This command does several things at once. You usually cannot log in directly to You should now be able to log into the Arvados VM and "check your environment.":check-environment.html -h3. Configuration (recommended) +h4. Configuration (recommended) The command line above is cumbersome, but you can configure SSH to remember many of these settings. Add this text to the file @.ssh/config@ in your home directory (create a new file if @.ssh/config@ doesn't exist): diff --git a/doc/user/getting_started/ssh-access-windows.html.textile.liquid b/doc/user/getting_started/ssh-access-windows.html.textile.liquid index 0406e7c03b..5cbe2a3285 100644 --- a/doc/user/getting_started/ssh-access-windows.html.textile.liquid +++ b/doc/user/getting_started/ssh-access-windows.html.textile.liquid @@ -9,13 +9,13 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} -This document is for accessing an Arvados VM using SSH keys in Windows environments. If you would like to use to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Unix environment (Linux, OS X, Cygwin), please visit the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page. +This document is for accessing an Arvados VM using SSH keys in Windows environments using PuTTY. If you would like to use to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Unix-like environment (Linux, macOS, Cygwin, or Windows Subsystem for Linux), please visit the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page. {% include 'ssh_intro' %} h1(#gettingkey). Getting your SSH key -(Note: if you are using the SSH client that comes with "Cygwin":http://cygwin.com, please use instructions found in the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.) +(Note: If you are using the SSH client that comes with "Cygwin":http://cygwin.com or Windows Subsystem for Linux (WSL) please use instructions found in the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.) We will be using PuTTY to connect to Arvados. "PuTTY":http://www.chiark.greenend.org.uk/~sgtatham/putty/ is a free (MIT-licensed) Win32 Telnet and SSH client. PuTTY includes all the tools a Windows user needs to create private keys and make SSH connections to your virtual machines in the Arvados Cloud. @@ -57,6 +57,16 @@ Pageant is a PuTTY utility that manages your private keys so is not necessary to h3. Initial configuration +h4. Connecting directly + +# Open PuTTY from the Start Menu. +# On the Session screen set the Host Name (or IP address) to “shell.ClusterID.example.com”, which is the hostname listed in the _Virtual Machines_ page. +# On the Session screen set the Port to “22”. +# On the Connection %(rarr)→% Data screen set the Auto-login username to the username listed in the *Login name* column on the Arvados Workbench Virtual machines_ page. +# Return to the Session screen. In the Saved Sessions box, enter a name for this configuration and click Save. + +h4. Connecting through switchyard + # Open PuTTY from the Start Menu. # On the Session screen set the Host Name (or IP address) to “shell”, which is the hostname listed in the _Virtual Machines_ page. # On the Session screen set the Port to “22”. diff --git a/doc/user/getting_started/vm-login-with-webshell.html.textile.liquid b/doc/user/getting_started/vm-login-with-webshell.html.textile.liquid index 551002e55e..2aa494ae9f 100644 --- a/doc/user/getting_started/vm-login-with-webshell.html.textile.liquid +++ b/doc/user/getting_started/vm-login-with-webshell.html.textile.liquid @@ -15,6 +15,10 @@ h2(#webshell). Access VM using webshell Webshell gives you access to an arvados virtual machine from your browser with no additional setup. +{% include 'notebox_begin' %} +Some Arvados clusters may not have webshell set up. If you do not see a "Log in" button or "web shell" column, you will have to follow the "Unix":ssh-access-unix.html or "Windows":ssh-access-windows.html @ssh@ instructions. +{% include 'notebox_end' %} + In the Arvados Workbench, click on the dropdown menu icon in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Virtual machines* to see the list of virtual machines you can access. If you do not have access to any virtual machines, please click on Send request for shell access or send an email to "support@curoverse.com":mailto:support@curoverse.com. Each row in the Virtual Machines panel lists the hostname of the VM, along with a Log in as *you* button under the column "Web shell". Clicking on this button will open up a webshell terminal for you in a new browser tab and log you in. diff --git a/doc/user/getting_started/workbench.html.textile.liquid b/doc/user/getting_started/workbench.html.textile.liquid index fc704227e0..e8f76b6260 100644 --- a/doc/user/getting_started/workbench.html.textile.liquid +++ b/doc/user/getting_started/workbench.html.textile.liquid @@ -9,14 +9,20 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} -If you are using the default Arvados instance for this guide, you can Access Arvados Workbench using this link: +{% include 'notebox_begin' %} +This guide covers the classic Arvados Workbench web application, sometimes referred to as "Workbench 1". There is also a new Workbench web application under development called "Workbench 2". Sites which have both Workbench applications installed will have a dropdown menu option "Switch to Workbench 2" to switch between versions. + +This guide will be updated to cover "Workbench 2" in the future. +{% include 'notebox_end' %} + +If you are using the "playground" Arvados instance for this guide, you can Access Arvados Workbench using this link: {{site.arvados_workbench_host}}/ (If you are using a different Arvados instance than the default for this guide, replace *{{ site.arvados_workbench_host }}* with your private instance in all of the examples in this guide.) -You may be asked to log in using a Google account. Arvados uses only your name and email address from Google services for identification, and will never access any personal information. If you are accessing Arvados for the first time, the Workbench may indicate your account status is *New / inactive*. If this is the case, contact the administrator of the Arvados instance to request activation of your account. +You will be asked to log in. Arvados uses only your name and email address for identification, and will never access any personal information. If you are accessing Arvados for the first time, the Workbench may indicate your account status is *New / inactive*. If this is the case, contact the administrator of the Arvados instance to request activation of your account. -Once your account is active, logging in to the Workbench will present you with the Dashboard. This gives a summary of your projects and recent activity in the Arvados instance. "You are now ready to run your first pipeline.":{{ site.baseurl }}/user/tutorials/tutorial-workflow-workbench.html +Once your account is active, logging in to the Workbench will present you with the Dashboard. This gives a summary of your projects and recent activity in the Arvados instance. You are now ready to "upload data":{{ site.baseurl }}/user/tutorials/tutorial-keep.html or "run your first workflow.":{{ site.baseurl }}/user/tutorials/tutorial-workflow-workbench.html !{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/workbench-dashboard.png! diff --git a/doc/user/index.html.textile.liquid b/doc/user/index.html.textile.liquid index 909394ef47..9749d1f284 100644 --- a/doc/user/index.html.textile.liquid +++ b/doc/user/index.html.textile.liquid @@ -9,7 +9,7 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} -This guide provides a reference for using Arvados to solve scientific big data problems, including: +Arvados is an open source platform for managing, processing, and sharing genomic and other large scientific and biomedical data. This guide provides a reference for using Arvados to solve scientific big data problems, including: * Robust storage of very large files, such as whole genome sequences, using the "Arvados Keep":{{site.baseurl}}/user/tutorials/tutorial-keep.html content-addressable cluster file system. * Running compute-intensive scientific analysis pipelines, such as genomic alignment and variant calls using the "Arvados Crunch":{{site.baseurl}}/user/tutorials/intro-crunch.html cluster compute engine. diff --git a/doc/user/topics/arv-copy.html.textile.liquid b/doc/user/topics/arv-copy.html.textile.liquid index 0f0e40be9c..d35df4fcec 100644 --- a/doc/user/topics/arv-copy.html.textile.liquid +++ b/doc/user/topics/arv-copy.html.textile.liquid @@ -9,103 +9,74 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} -{% include 'crunch1only_begin' %} -On those sites, the "copy a pipeline template" feature described below is not available. However, "copy a workflow" feature is not yet implemented. -{% include 'crunch1only_end' %} - This tutorial describes how to copy Arvados objects from one cluster to another by using @arv-copy@. {% include 'tutorial_expectations' %} h2. arv-copy -@arv-copy@ allows users to copy collections and pipeline templates from one cluster to another. By default, @arv-copy@ will recursively go through a template and copy all dependencies associated with the object. +@arv-copy@ allows users to copy collections and workflows from one cluster to another. By default, @arv-copy@ will recursively go through the workflow and copy all dependencies associated with the object. -For example, let's copy from the Arvados playground, also known as *qr1hi*, to *dst_cluster*. The names *qr1hi* and *dst_cluster* are interchangable with any cluster name. You can find the cluster name from the prefix of the uuid of the object you want to copy. For example, in *qr1hi*-4zz18-tci4vn4fa95w0zx, the cluster name is qr1hi. +For example, let's copy from the Arvados playground, also known as *pirca*, to *dstcl*. The names *pirca* and *dstcl* are interchangable with any cluster id. You can find the cluster name from the prefix of the uuid of the object you want to copy. For example, in *zzzzz*-4zz18-tci4vn4fa95w0zx, the cluster name is *zzzzz* . -In order to communicate with both clusters, you must create custom configuration files for each cluster. In the Arvados Workbench, click on the dropdown menu icon in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Current token*. Copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ in both of your clusters. Then, create two configuration files, one for each cluster. The names of the files must have the format of *ClusterID.conf*. In our example, let's make two files, one for *qr1hi* and one for *dst_cluster*. From your *Current token* page in *qr1hi* and *dst_cluster*, copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@. +In order to communicate with both clusters, you must create custom configuration files for each cluster. In the Arvados Workbench, click on the dropdown menu icon in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Current token*. Copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ in both of your clusters. Then, create two configuration files in @~/.config/arvados@, one for each cluster. The names of the files must have the format of *ClusterID.conf*. Navigate to the *Current token* page on each of *pirca* and *dstcl* to get the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@. !{display: block;margin-left: 25px;margin-right: auto;}{{ site.baseurl }}/images/api-token-host.png! -Copy your @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ into the config files as shown below in the shell account from which you are executing the commands. For example, the default shell you may have access to is shell.qr1hi. You can add these files in ~/.config/arvados/ in the qr1hi shell terminal. +The config file consists of two lines, one for ARVADOS_API_HOST and one for ARVADOS_API_TOKEN: - -
~$ cd ~/.config/arvados
-~$ echo "ARVADOS_API_HOST=qr1hi.arvadosapi.com" >> qr1hi.conf
-~$ echo "ARVADOS_API_TOKEN=123456789abcdefghijkl" >> qr1hi.conf
-~$ echo "ARVADOS_API_HOST=dst_cluster.arvadosapi.com" >> dst_cluster.conf
-~$ echo "ARVADOS_API_TOKEN=987654321lkjihgfedcba" >> dst_cluster.conf
-
-
+
+ARVADOS_API_HOST=zzzzz.arvadosapi.com
+ARVADOS_API_TOKEN=v2/zzzzz-gj3su-xxxxxxxxxxxxxxx/123456789abcdefghijkl
+
+ +Copy your @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ into the config files as shown below in the shell account from which you are executing the commands. In our example, you need two files, @~/.config/arvados/pirca.conf@ and @~/.config/arvados/dstcl.conf@. -Now you're ready to copy between *qr1hi* and *dst_cluster*! +Now you're ready to copy between *pirca* and *dstcl*! h3. How to copy a collection -First, select the uuid of the collection you want to copy from the source cluster. The uuid can be found in the collection display page in the collection summary area (top left box), or from the URL bar (the part after @collections/...@) +First, determine the uuid or portable data hash of the collection you want to copy from the source cluster. The uuid can be found in the collection display page in the collection summary area (top left box), or from the URL bar (the part after @collections/...@) -Now copy the collection from *qr1hi* to *dst_cluster*. We will use the uuid @qr1hi-4zz18-tci4vn4fa95w0zx@ as an example. You can find this collection in the lobSTR v.3 project on playground.arvados.org. +Now copy the collection from *pirca* to *dstcl*. We will use the uuid @jutro-4zz18-tv416l321i4r01e@ as an example. You can find this collection on playground.arvados.org. -
~$ arv-copy --src qr1hi --dst dst_cluster qr1hi-4zz18-tci4vn4fa95w0zx
-qr1hi-4zz18-tci4vn4fa95w0zx: 6.1M / 6.1M 100.0%
-arvados.arv-copy[1234] INFO: Success: created copy with uuid dst_cluster-4zz18-8765943210cdbae
-
-
- -The output of arv-copy displays the uuid of the collection generated in the destination cluster. By default, the output is placed in your home project in the destination cluster. If you want to place your collection in a pre-created project, you can specify the project you want it to be in using the tag @--project-uuid@ followed by the project uuid. - -For example, this will copy the collection to project dst_cluster-j7d0g-a894213ukjhal12 in the destination cluster. - -
~$ arv-copy --src qr1hi --dst dst_cluster --project-uuid dst_cluster-j7d0g-a894213ukjhal12 qr1hi-4zz18-tci4vn4fa95w0zx
+
~$ arv-copy --src pirca --dst dstcl jutro-4zz18-tv416l321i4r01e
+jutro-4zz18-tv416l321i4r01e: 6.1M / 6.1M 100.0%
+arvados.arv-copy[1234] INFO: Success: created copy with uuid dstcl-4zz18-xxxxxxxxxxxxxxx
 
-h3. How to copy a pipeline template - -{% include 'arv_copy_expectations' %} - -We will use the uuid @qr1hi-p5p6p-9pkaxt6qjnkxhhu@ as an example pipeline template. +You can also copy by content address: -
~$ arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial qr1hi-p5p6p-9pkaxt6qjnkxhhu
-To git@git.dst_cluster.arvadosapi.com:$USER/tutorial.git
- * [new branch] git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d -> git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d
-arvados.arv-copy[19694] INFO: Success: created copy with uuid dst_cluster-p5p6p-rym2h5ub9m8ofwj
+
~$ arv-copy --src pirca --dst dstcl 2463fa9efeb75e099685528b3b9071e0+438
+2463fa9efeb75e099685528b3b9071e0+438: 6.1M / 6.1M 100.0%
+arvados.arv-copy[1234] INFO: Success: created copy with uuid dstcl-4zz18-xxxxxxxxxxxxxxx
 
-New branches in the destination git repo will be created for each branch used in the pipeline template. For example, if your source branch was named ac21f0d45a76294aaca0c0c0fdf06eb72d03368d, your new branch will be named @git_git_qr1hi_arvadosapi_com_reponame_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d@. - -By default, if you copy a pipeline template recursively, you will find that the template as well as all the dependencies are in your home project. +The output of arv-copy displays the uuid of the collection generated in the destination cluster. By default, the output is placed in your home project in the destination cluster. If you want to place your collection in an existing project, you can specify the project you want it to be in using the tag @--project-uuid@ followed by the project uuid. -If you would like to copy the object without dependencies, you can use the @--no-recursive@ tag. +For example, this will copy the collection to project dstcl-j7d0g-a894213ukjhal12 in the destination cluster. -For example, we can copy the same object using this tag. - - -
~$ arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial --no-recursive qr1hi-p5p6p-9pkaxt6qjnkxhhu
+ 
~$ arv-copy --src pirca --dst dstcl --project-uuid dstcl-j7d0g-a894213ukjhal12 jutro-4zz18-tv416l321i4r01e
 
h3. How to copy a workflow -We will use the uuid @zzzzz-7fd4e-sampleworkflow1@ as an example workflow. +We will use the uuid @jutro-7fd4e-mkmmq53m1ze6apx@ as an example workflow. -
~$ arv-copy --src zzzzz --dst dst_cluster --dst-git-repo $USER/tutorial zzzzz-7fd4e-sampleworkflow1
-zzzzz-4zz18-jidprdejysravcr: 1143M / 1143M 100.0%
-2017-01-04 04:11:58 arvados.arv-copy[5906] INFO:
-2017-01-04 04:11:58 arvados.arv-copy[5906] INFO: Success: created copy with uuid dst_cluster-7fd4e-ojtgpne594ubkt7
+
~$ arv-copy --src jutro --dst pirca --project-uuid pirca-j7d0g-ecak8knpefz8ere jutro-7fd4e-mkmmq53m1ze6apx
+ae480c5099b81e17267b7445e35b4bc7+180: 23M / 23M 100.0%
+2463fa9efeb75e099685528b3b9071e0+438: 156M / 156M 100.0%
+jutro-4zz18-vvvqlops0a0kpdl: 94M / 94M 100.0%
+2020-08-19 17:04:13 arvados.arv-copy[4789] INFO:
+2020-08-19 17:04:13 arvados.arv-copy[4789] INFO: Success: created copy with uuid pirca-7fd4e-s0tw9rfbkpo2fmx
 
-The name, description, and workflow definition from the original workflow will be used for the destination copy. In addition, any *locations* and *docker images* found in the src workflow definition will also be copied to the destination recursively. +The name, description, and workflow definition from the original workflow will be used for the destination copy. In addition, any *collections* and *docker images* referenced in the source workflow definition will also be copied to the destination. If you would like to copy the object without dependencies, you can use the @--no-recursive@ flag. - -For example, we can copy the same object non-recursively using the following: - - -
~$ arv-copy --src zzzzz --dst dst_cluster --dst-git-repo $USER/tutorial --no-recursive zzzzz-7fd4e-sampleworkflow1
-
-
diff --git a/doc/user/topics/arv-docker.html.textile.liquid b/doc/user/topics/arv-docker.html.textile.liquid index e9e8450268..bb1c7dd53e 100644 --- a/doc/user/topics/arv-docker.html.textile.liquid +++ b/doc/user/topics/arv-docker.html.textile.liquid @@ -1,7 +1,7 @@ --- layout: default navsection: userguide -title: "Customizing Crunch environment using Docker" +title: "Working with Docker images" ... {% comment %} Copyright (C) The Arvados Authors. All rights reserved. @@ -9,145 +9,80 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} -This page describes how to customize the runtime environment (e.g., the programs, libraries, and other dependencies needed to run a job) that a crunch script will be run in using "Docker.":https://www.docker.com/ Docker is a tool for building and running containers that isolate applications from other applications running on the same node. For detailed information about Docker, see the "Docker User Guide.":https://docs.docker.com/userguide/ +This page describes how to set up the runtime environment (e.g., the programs, libraries, and other dependencies needed to run a job) that a workflow step will be run in using "Docker.":https://www.docker.com/ Docker is a tool for building and running containers that isolate applications from other applications running on the same node. For detailed information about Docker, see the "Docker User Guide.":https://docs.docker.com/userguide/ -This page will demonstrate how to: +This page describes: -# Fetch the arvados/jobs Docker image -# Manually install additional software into the container -# Create a new custom image -# Upload that image to Arvados for use by Crunch jobs -# Share your image with others +# "Create a custom image using a Dockerfile":#create +# "Uploading an image to Arvados":#upload +# "Sources of pre-built bioinformatics Docker images":#sources {% include 'tutorial_expectations_workstation' %} You also need ensure that "Docker is installed,":https://docs.docker.com/installation/ the Docker daemon is running, and you have permission to access Docker. You can test this by running @docker version@. If you receive a permission denied error, your user account may need to be added to the @docker@ group. If you have root access, you can add yourself to the @docker@ group using @$ sudo addgroup $USER docker@ then log out and log back in again; otherwise consult your local sysadmin. -h2. Fetch a starting image +h2(#create). Create a custom image using a Dockerfile -The easiest way to begin is to start from the "arvados/jobs" image which already has the Arvados SDK installed along with other configuration required for use with Crunch. +This example shows how to create a Docker image and add the R package. -Download the latest "arvados/jobs" image from the Docker registry: +First, create new directory called @docker-example@, in that directory create a file called @Dockerfile@. -
$ docker pull arvados/jobs:latest
-Pulling repository arvados/jobs
-3132168f2acb: Download complete
-a42b7f2c59b6: Download complete
-e5afdf26a7ae: Download complete
-5cae48636278: Download complete
-7a4f91b70558: Download complete
-a04a275c1fd6: Download complete
-c433ff206a22: Download complete
-b2e539b45f96: Download complete
-073b2581c6be: Download complete
-593915af19dc: Download complete
-32260b35005e: Download complete
-6e5b860c1cde: Download complete
-95f0bfb43d4d: Download complete
-c7fd77eedb96: Download complete
-0d7685aafd00: Download complete
+
$ mkdir docker-example-r-base
+$ cd docker-example-r-base
 
-h2. Install new packages - -Next, enter the container using @docker run@, providing the arvados/jobs image and the program you want to run (in this case the bash shell). - -
$ docker run --interactive --tty --user root arvados/jobs /bin/bash
-root@fbf1d0f529d5:/#
+
FROM ubuntu:bionic
+RUN apt-get update && apt-get -yq --no-install-recommends install r-base-core
 
-Next, update the package list using @apt-get update@. +The "RUN" command is executed inside the container and can be any shell command line. You are not limited to installing Debian packages. You may compile programs or libraries from source and install them, edit systemwide configuration files, use other package managers such as @pip@ or @gem@, and perform any other customization necessary to run your program. - -
root@fbf1d0f529d5:/# apt-get update
-Get:2 http://apt.arvados.org stretch-dev InRelease [3260 B]
-Get:1 http://security-cdn.debian.org/debian-security stretch/updates InRelease [94.3 kB]
-Ign:3 http://cdn-fastly.deb.debian.org/debian stretch InRelease
-Get:4 http://cdn-fastly.deb.debian.org/debian stretch-updates InRelease [91.0 kB]
-Get:5 http://apt.arvados.org stretch-dev/main amd64 Packages [208 kB]
-Get:6 http://cdn-fastly.deb.debian.org/debian stretch Release [118 kB]
-Get:7 http://security-cdn.debian.org/debian-security stretch/updates/main amd64 Packages [499 kB]
-Get:8 http://cdn-fastly.deb.debian.org/debian stretch Release.gpg [2434 B]
-Get:9 http://cdn-fastly.deb.debian.org/debian stretch-updates/main amd64 Packages.diff/Index [10.6 kB]
-Get:10 http://cdn-fastly.deb.debian.org/debian stretch-updates/main amd64 Packages 2019-07-08-0821.07.pdiff [445 B]
-Get:10 http://cdn-fastly.deb.debian.org/debian stretch-updates/main amd64 Packages 2019-07-08-0821.07.pdiff [445 B]
-Fetched 1026 kB in 0s (1384 kB/s)
-Reading package lists... Done
-
-
+You can also visit the "Docker tutorial":https://docs.docker.com/get-started/part2/ for more information and examples. + +You should add your Dockerfiles to the same source control repository as the Workflows that use them. -In this example, we will install the "R" statistical language Debian package "r-base-core". Use @apt-get install@: +h3. Create a new image + +We're now ready to create a new Docker image. Use @docker build@ to create a new image from the Dockerfile. -
root@fbf1d0f529d5:/# apt-get install r-base-core
-Reading package lists... Done
-Building dependency tree
-Reading state information... Done
-The following additional packages will be installed:
-[...]
-done.
+
docker-example-r-base$ docker build -t docker-example-r-base .
 
+h3. Verify image + Now we can verify that "R" is installed: -
root@fbf1d0f529d5:/# R
+
$ docker run -ti docker-example-r-base
+root@57ec8f8b2663:/# R
 
-R version 3.3.3 (2017-03-06) -- "Another Canoe"
-Copyright (C) 2017 The R Foundation for Statistical Computing
+R version 3.4.4 (2018-03-15) -- "Someone to Lean On"
+Copyright (C) 2018 The R Foundation for Statistical Computing
 Platform: x86_64-pc-linux-gnu (64-bit)
-
-R is free software and comes with ABSOLUTELY NO WARRANTY.
-You are welcome to redistribute it under certain conditions.
-Type 'license()' or 'licence()' for distribution details.
-
-R is a collaborative project with many contributors.
-Type 'contributors()' for more information and
-'citation()' on how to cite R or R packages in publications.
-
-Type 'demo()' for some demos, 'help()' for on-line help, or
-'help.start()' for an HTML browser interface to help.
-Type 'q()' to quit R.
-
->
 
-Note that you are not limited to installing Debian packages. You may compile programs or libraries from source and install them, edit systemwide configuration files, use other package managers such as @pip@ or @gem@, and perform any other customization necessary to run your program. +h2(#upload). Upload your image -h2. Create a new image - -We're now ready to create a new Docker image. First, quit the container, then use @docker commit@ to create a new image from the stopped container. The container id can be found in the default hostname of the container displayed in the prompt, in this case @fbf1d0f529d5@: +Finally, we are ready to upload the new Docker image to Arvados. Use @arv-keepdocker@ with the image repository name to upload the image. Without arguments, @arv-keepdocker@ will print out the list of Docker images in Arvados that are available to you. -
root@fbf1d0f529d5:/# exit
-$ docker commit fbf1d0f529d5 arvados/jobs-with-r
-sha256:2818853ff9f9af5d7f77979803baac9c4710790ad2b84c1a754b02728fdff205
-$ docker images
-$ docker images |head
-REPOSITORY            TAG                 IMAGE ID            CREATED             SIZE
-arvados/jobs-with-r   latest              2818853ff9f9        9 seconds ago       703.1 MB
-arvados/jobs          latest              12b9f859d48c        4 days ago          362 MB
-
-
- -h2. Upload your image +
$ arv-keepdocker docker-example-r-base
+2020-06-29 13:48:19 arvados.arv_put[769] INFO: Creating new cache file at /home/peter/.cache/arvados/arv-put/39ddb51ebf6c5fcb3d713b5969466967
+206M / 206M 100.0% 2020-06-29 13:48:21 arvados.arv_put[769] INFO:
 
-Finally, we are ready to upload the new Docker image to Arvados.  Use @arv-keepdocker@ with the image repository name to upload the image.  Without arguments, @arv-keepdocker@ will print out the list of Docker images in Arvados that are available to you.
+2020-06-29 13:48:21 arvados.arv_put[769] INFO: Collection saved as 'Docker image docker-example-r-base:latest sha256:edd10'
+zzzzz-4zz18-0tayximqcyb6uf8
 
-
-
$ arv-keepdocker arvados/jobs-with-r
-703M / 703M 100.0%
-Collection saved as 'Docker image arvados/jobs-with-r:latest 2818853ff9f9'
-qr1hi-4zz18-abcdefghijklmno
-$ arv-keepdocker
+$ arv-keepdocker images
 REPOSITORY                      TAG         IMAGE ID      COLLECTION                     CREATED
-arvados/jobs-with-r             latest      2818853ff9f9  qr1hi-4zz18-abcdefghijklmno    Tue Jan 17 20:35:53 2017
+docker-example-r-base           latest      sha256:edd10  zzzzz-4zz18-0tayximqcyb6uf8    Mon Jun 29 17:46:16 2020
 
@@ -156,14 +91,24 @@ You are now able to specify the runtime environment for your program using @Dock
 hints:
   DockerRequirement:
-    dockerPull: arvados/jobs-with-r
+    dockerPull: docker-example-r-base
 
-h2. Share Docker images +h3. Uploading Docker images to a shared project -Docker images are subject to normal Arvados permissions. If wish to share your Docker image with others (or wish to share a pipeline template that uses your Docker image) you will need to use @arv-keepdocker@ with the @--project-uuid@ option to upload the image to a shared project. +Docker images are subject to normal Arvados permissions. If wish to share your Docker image with others you should use @arv-keepdocker@ with the @--project-uuid@ option to add the image to a shared project and ensure that metadata is set correctly. -
$ arv-keepdocker arvados/jobs-with-r --project-uuid qr1hi-j7d0g-xxxxxxxxxxxxxxx
+
$ arv-keepdocker docker-example-r-base --project-uuid zzzzz-j7d0g-xxxxxxxxxxxxxxx
 
+ +h2(#sources). Sources of pre-built images + +In addition to creating your own contianers, there are a number of resources where you can find bioinformatics tools already wrapped in container images: + +"BioContainers":https://biocontainers.pro/ + +"Dockstore":https://dockstore.org/ + +"Docker Hub":https://hub.docker.com/ diff --git a/doc/user/topics/arv-web.html.textile.liquid b/doc/user/topics/arv-web.html.textile.liquid deleted file mode 100644 index 9671e97096..0000000000 --- a/doc/user/topics/arv-web.html.textile.liquid +++ /dev/null @@ -1,106 +0,0 @@ ---- -layout: default -navsection: userguide -title: "Using arv-web" -... -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -@arv-web@ enables you to run a custom web service from the contents of an Arvados collection. - -{% include 'tutorial_expectations_workstation' %} - -h2. Usage - -@arv-web@ enables you to set up a web service based on the most recent collection in a project. An arv-web application is a reproducible, immutable application bundle where the web app is packaged with both the code to run and the data to serve. Because Arvados Collections can be updated with minimum duplication, it is efficient to produce a new application bundle when the code or data needs to be updated; retaining old application bundles makes it easy to go back and run older versions of your web app. - -
-$ cd $HOME/arvados/services/arv-web
-usage: arv-web.py [-h] --project-uuid PROJECT_UUID [--port PORT]
-                  [--image IMAGE]
-
-optional arguments:
-  -h, --help            show this help message and exit
-  --project-uuid PROJECT_UUID
-                        Project uuid to watch
-  --port PORT           Host port to listen on (default 8080)
-  --image IMAGE         Docker image to run
-
- -At startup, @arv-web@ queries an Arvados project and mounts the most recently modified collection into a temporary directory. It then runs a Docker image with the collection bound to @/mnt@ inside the container. When a new collection is added to the project, or an existing project is updated, it will stop the running Docker container, unmount the old collection, mount the new most recently modified collection, and restart the Docker container with the new mount. - -h2. Docker container - -The @Dockerfile@ in @arvados/docker/arv-web@ builds a Docker image that runs Apache with @/mnt@ as the DocumentRoot. It is configured to run web applications which use Python WSGI, Ruby Rack, or CGI; to serve static HTML; or browse the contents of the @public@ subdirectory of the collection using default Apache index pages. - -To build the Docker image: - - -
~$ cd arvados/docker
-~/arvados/docker$ docker build -t arvados/arv-web arv-web
-
-
- -h2. Running sample applications - -First, in Arvados Workbench, create a new project. Copy the project UUID from the URL bar (this is the part of the URL after @projects/...@). - -Now upload a collection containing a "Python WSGI web app:":http://wsgi.readthedocs.org/en/latest/ - - -
~$ cd arvados/services/arv-web
-~/arvados/services/arv-web$ arv-put --project [zzzzz-j7d0g-yourprojectuuid] --name sample-wsgi-app sample-wsgi-app
-0M / 0M 100.0%
-Collection saved as 'sample-wsgi-app'
-zzzzz-4zz18-ebohzfbzh82qmqy
-~/arvados/services/arv-web$ ./arv-web.py --project [zzzzz-j7d0g-yourprojectuuid] --port 8888
-2015-01-30 11:21:00 arvados.arv-web[4897] INFO: Mounting zzzzz-4zz18-ebohzfbzh82qmqy
-2015-01-30 11:21:01 arvados.arv-web[4897] INFO: Starting Docker container arvados/arv-web
-2015-01-30 11:21:02 arvados.arv-web[4897] INFO: Container id e79e70558d585a3e038e4bfbc97e5c511f21b6101443b29a8017bdf3d84689a3
-2015-01-30 11:21:03 arvados.arv-web[4897] INFO: Waiting for events
-
-
- -The sample application will be available at @http://localhost:8888@. - -h3. Updating the application - -If you upload a new collection to the same project, arv-web will restart the web service and serve the new collection. For example, uploading a collection containing a "Ruby Rack web app:":https://github.com/rack/rack/wiki - - -
~$ cd arvados/services/arv-web
-~/arvados/services/arv-web$ arv-put --project [zzzzz-j7d0g-yourprojectuuid] --name sample-rack-app sample-rack-app
-0M / 0M 100.0%
-Collection saved as 'sample-rack-app'
-zzzzz-4zz18-dhhm0ay8k8cqkvg
-
-
- -@arv-web@ will automatically notice the change, load a new container, and send an update signal (SIGHUP) to the service: - -
-2015-01-30 11:21:03 arvados.arv-web[4897] INFO:Waiting for events
-2015-01-30 11:21:04 arvados.arv-web[4897] INFO:create zzzzz-4zz18-dhhm0ay8k8cqkvg
-2015-01-30 11:21:05 arvados.arv-web[4897] INFO:Mounting zzzzz-4zz18-dhhm0ay8k8cqkvg
-2015-01-30 11:21:06 arvados.arv-web[4897] INFO:Sending refresh signal to container
-2015-01-30 11:21:07 arvados.arv-web[4897] INFO:Waiting for events
-
- -h2. Writing your own applications - -The @arvados/arv-web@ image serves Python and Ruby applications using Phusion Passenger and Apache @mod_passenger@. See "Phusion Passenger users guide for Apache":https://www.phusionpassenger.com/documentation/Users%20guide%20Apache.html for details, and look at the sample apps @arvados/services/arv-web/sample-wsgi-app@ and @arvados/services/arv-web/sample-rack-app@. - -You can serve CGI applications using standard Apache CGI support. See "Apache Tutorial: Dynamic Content with CGI":https://httpd.apache.org/docs/current/howto/cgi.html for details, and look at the sample app @arvados/services/arv-web/sample-cgi-app@. - -You can also serve static content from the @public@ directory of the collection. Look at @arvados/services/arv-web/sample-static-page@ for an example. If no @index.html@ is found in @public/@, it will render default Apache index pages, permitting simple browsing of the collection contents. - -h3. Custom images - -You can provide your own Docker image. The Docker image that will be used create the web application container is specified in the @docker_image@ file in the root of the collection. You can also specify @--image@ on the command @arv-web@ line to choose the docker image (this will override the contents of @docker_image@). - -h3. Reloading the web service - -Stopping the Docker container and starting it again can result in a small amount of downtime. When the collection containing a new or updated web application uses the same Docker image as the currently running web application, it is possible to avoid this downtime by keeping the existing container and only reloading the web server. This is accomplished by providing a file called @reload@ in the root of the collection, which should contain the commands necessary to reload the web server inside the container. diff --git a/doc/user/topics/keep.html.textile.liquid b/doc/user/topics/keep.html.textile.liquid index 68b6a87d09..c415cebbdd 100644 --- a/doc/user/topics/keep.html.textile.liquid +++ b/doc/user/topics/keep.html.textile.liquid @@ -38,7 +38,7 @@ notextile.
~$ cd /scratch/you
 
 When you run this command, you may get this API warning:
 
-notextile. 
WARNING:root:API lookup failed for collection 204e43b8a1185621ca55a94839582e6f+67108864 (<class 'apiclient.errors.HttpError'>: <HttpError 404 when requesting https://qr1hi.arvadosapi.com/arvados/v1/collections/204e43b8a1185621ca55a94839582e6f%2B67108864?alt=json returned "Not Found">)
+notextile.
WARNING:root:API lookup failed for collection 204e43b8a1185621ca55a94839582e6f+67108864 (<class 'apiclient.errors.HttpError'>: <HttpError 404 when requesting https://zzzzz.arvadosapi.com/arvados/v1/collections/204e43b8a1185621ca55a94839582e6f%2B67108864?alt=json returned "Not Found">)
This happens because @arv-get@ tries to find a collection with this identifier. When that fails, it emits this warning, then looks for a datablock instead, which succeeds. diff --git a/doc/user/topics/tutorial-gatk-variantfiltration.html.textile.liquid b/doc/user/topics/tutorial-gatk-variantfiltration.html.textile.liquid deleted file mode 100644 index 544ccbd35e..0000000000 --- a/doc/user/topics/tutorial-gatk-variantfiltration.html.textile.liquid +++ /dev/null @@ -1,173 +0,0 @@ ---- -layout: default -navsection: userguide -title: "Using GATK with Arvados" -... -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -This tutorial demonstrates how to use the Genome Analysis Toolkit (GATK) with Arvados. In this example we will install GATK and then create a VariantFiltration job to assign pass/fail scores to variants in a VCF file. - -{% include 'tutorial_expectations' %} - -h2. Installing GATK - -Download the GATK binary tarball[1] -- e.g., @GenomeAnalysisTK-2.6-4.tar.bz2@ -- and "copy it to your Arvados VM":{{site.baseurl}}/user/tutorials/tutorial-keep.html. - - -
~$ arv-put GenomeAnalysisTK-2.6-4.tar.bz2
-c905c8d8443a9c44274d98b7c6cfaa32+94
-
-
- -Next, you need the GATK Resource Bundle[2]. This may already be available in Arvados. If not, you will need to download the files listed below and put them into Keep. - - -
~$ arv keep ls -s d237a90bae3870b3b033aea1e99de4a9+10820
-  50342 1000G_omni2.5.b37.vcf.gz
-      1 1000G_omni2.5.b37.vcf.gz.md5
-    464 1000G_omni2.5.b37.vcf.idx.gz
-      1 1000G_omni2.5.b37.vcf.idx.gz.md5
-  43981 1000G_phase1.indels.b37.vcf.gz
-      1 1000G_phase1.indels.b37.vcf.gz.md5
-    326 1000G_phase1.indels.b37.vcf.idx.gz
-      1 1000G_phase1.indels.b37.vcf.idx.gz.md5
- 537210 CEUTrio.HiSeq.WGS.b37.bestPractices.phased.b37.vcf.gz
-      1 CEUTrio.HiSeq.WGS.b37.bestPractices.phased.b37.vcf.gz.md5
-   3473 CEUTrio.HiSeq.WGS.b37.bestPractices.phased.b37.vcf.idx.gz
-      1 CEUTrio.HiSeq.WGS.b37.bestPractices.phased.b37.vcf.idx.gz.md5
-  19403 Mills_and_1000G_gold_standard.indels.b37.vcf.gz
-      1 Mills_and_1000G_gold_standard.indels.b37.vcf.gz.md5
-    536 Mills_and_1000G_gold_standard.indels.b37.vcf.idx.gz
-      1 Mills_and_1000G_gold_standard.indels.b37.vcf.idx.gz.md5
-  29291 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.sites.vcf.gz
-      1 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.sites.vcf.gz.md5
-    565 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.sites.vcf.idx.gz
-      1 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.sites.vcf.idx.gz.md5
-  37930 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf.gz
-      1 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf.gz.md5
-    592 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf.idx.gz
-      1 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf.idx.gz.md5
-5898484 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.bam
-    112 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.bam.bai.gz
-      1 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.bam.bai.gz.md5
-      1 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.bam.md5
-   3837 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.vcf.gz
-      1 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.vcf.gz.md5
-     65 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.vcf.idx.gz
-      1 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.vcf.idx.gz.md5
- 275757 dbsnp_137.b37.excluding_sites_after_129.vcf.gz
-      1 dbsnp_137.b37.excluding_sites_after_129.vcf.gz.md5
-   3735 dbsnp_137.b37.excluding_sites_after_129.vcf.idx.gz
-      1 dbsnp_137.b37.excluding_sites_after_129.vcf.idx.gz.md5
- 998153 dbsnp_137.b37.vcf.gz
-      1 dbsnp_137.b37.vcf.gz.md5
-   3890 dbsnp_137.b37.vcf.idx.gz
-      1 dbsnp_137.b37.vcf.idx.gz.md5
-  58418 hapmap_3.3.b37.vcf.gz
-      1 hapmap_3.3.b37.vcf.gz.md5
-    999 hapmap_3.3.b37.vcf.idx.gz
-      1 hapmap_3.3.b37.vcf.idx.gz.md5
-      3 human_g1k_v37.dict.gz
-      1 human_g1k_v37.dict.gz.md5
-      2 human_g1k_v37.fasta.fai.gz
-      1 human_g1k_v37.fasta.fai.gz.md5
- 849537 human_g1k_v37.fasta.gz
-      1 human_g1k_v37.fasta.gz.md5
-      1 human_g1k_v37.stats.gz
-      1 human_g1k_v37.stats.gz.md5
-      3 human_g1k_v37_decoy.dict.gz
-      1 human_g1k_v37_decoy.dict.gz.md5
-      2 human_g1k_v37_decoy.fasta.fai.gz
-      1 human_g1k_v37_decoy.fasta.fai.gz.md5
- 858592 human_g1k_v37_decoy.fasta.gz
-      1 human_g1k_v37_decoy.fasta.gz.md5
-      1 human_g1k_v37_decoy.stats.gz
-      1 human_g1k_v37_decoy.stats.gz.md5
-
-
- -h2. Submit a GATK job - -The Arvados distribution includes an example crunch script ("crunch_scripts/GATK2-VariantFiltration":https://dev.arvados.org/projects/arvados/repository/revisions/master/entry/crunch_scripts/GATK2-VariantFiltration) that runs the GATK VariantFiltration tool with some default settings. - - -
~$ src_version=76588bfc57f33ea1b36b82ca7187f465b73b4ca4
-~$ vcf_input=5ee633fe2569d2a42dd81b07490d5d13+82
-~$ gatk_binary=c905c8d8443a9c44274d98b7c6cfaa32+94
-~$ gatk_bundle=d237a90bae3870b3b033aea1e99de4a9+10820
-~$ cat >the_job <<EOF
-{
- "script":"GATK2-VariantFiltration",
- "repository":"arvados",
- "script_version":"$src_version",
- "script_parameters":
- {
-  "input":"$vcf_input",
-  "gatk_binary_tarball":"$gatk_binary",
-  "gatk_bundle":"$gatk_bundle"
- }
-}
-EOF
-
-
- -* @"input"@ is collection containing the source VCF data. Here we are using an exome report from PGP participant hu34D5B9. -* @"gatk_binary_tarball"@ is a Keep collection containing the GATK 2 binary distribution[1] tar file. -* @"gatk_bundle"@ is a Keep collection containing the GATK resource bundle[2]. - -Now start a job: - - -
~$ arv job create --job "$(cat the_job)"
-{
- "href":"https://qr1hi.arvadosapi.com/arvados/v1/jobs/qr1hi-8i9sb-n9k7qyp7bs5b9d4",
- "kind":"arvados#job",
- "etag":"9j99n1feoxw3az448f8ises12",
- "uuid":"qr1hi-8i9sb-n9k7qyp7bs5b9d4",
- "owner_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "created_at":"2013-12-17T19:02:15Z",
- "modified_by_client_uuid":"qr1hi-ozdt8-obw7foaks3qjyej",
- "modified_by_user_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "modified_at":"2013-12-17T19:02:15Z",
- "updated_at":"2013-12-17T19:02:15Z",
- "submit_id":null,
- "priority":null,
- "script":"GATK2-VariantFiltration",
- "script_parameters":{
-  "input":"5ee633fe2569d2a42dd81b07490d5d13+82",
-  "gatk_binary_tarball":"c905c8d8443a9c44274d98b7c6cfaa32+94",
-  "gatk_bundle":"d237a90bae3870b3b033aea1e99de4a9+10820"
- },
- "script_version":"76588bfc57f33ea1b36b82ca7187f465b73b4ca4",
- "cancelled_at":null,
- "cancelled_by_client_uuid":null,
- "cancelled_by_user_uuid":null,
- "started_at":null,
- "finished_at":null,
- "output":null,
- "success":null,
- "running":null,
- "is_locked_by_uuid":null,
- "log":null,
- "runtime_constraints":{},
- "tasks_summary":{}
-}
-
-
- -Once the job completes, the output can be found in hu34D5B9-exome-filtered.vcf: - -
~$ arv keep ls bedd6ff56b3ae9f90d873b1fcb72f9a3+91
-hu34D5B9-exome-filtered.vcf
-
-
- -h2. Notes - -fn1. "Download the GATK tools":http://www.broadinstitute.org/gatk/download - -fn2. "Information about the GATK resource bundle":http://gatkforums.broadinstitute.org/discussion/1213/whats-in-the-resource-bundle-and-how-can-i-get-it and "direct download link":ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/2.5/b37/ (if prompted, submit an empty password) diff --git a/doc/user/topics/tutorial-job1.html.textile.liquid b/doc/user/topics/tutorial-job1.html.textile.liquid deleted file mode 100644 index f7a2060101..0000000000 --- a/doc/user/topics/tutorial-job1.html.textile.liquid +++ /dev/null @@ -1,214 +0,0 @@ ---- -layout: default -navsection: userguide -title: "Running a Crunch job on the command line" -... -{% comment %} -Copyright (C) The Arvados Authors. All rights reserved. - -SPDX-License-Identifier: CC-BY-SA-3.0 -{% endcomment %} - -This tutorial introduces how to run individual Crunch jobs using the @arv@ command line tool. - -{% include 'tutorial_expectations' %} - -You will create a job to run the "hash" Crunch script. The "hash" script computes the MD5 hash of each file in a collection. - -h2. Jobs - -Crunch pipelines consist of one or more jobs. A "job" is a single run of a specific version of a Crunch script with a specific input. You can also run jobs individually. - -A request to run a Crunch job are is described using a JSON object. For example: - - -
~$ cat >~/the_job <<EOF
-{
- "script": "hash",
- "repository": "arvados",
- "script_version": "master",
- "script_parameters": {
-  "input": "c1bad4b39ca5a924e481008009d94e32+210"
- },
- "no_reuse": "true"
-}
-EOF
-
-
- -* @cat@ is a standard Unix utility that writes a sequence of input to standard output. -* @<~/the_job@ redirects standard output to a file called @~/the_job@. -* @"repository"@ is the name of a Git repository to search for the script version. You can access a list of available git repositories on the Arvados Workbench under "*Code repositories*":{{site.arvados_workbench_host}}/repositories. -* @"script_version"@ specifies the version of the script that you wish to run. This can be in the form of an explicit Git revision hash, a tag, or a branch. Arvados logs the script version that was used in the run, enabling you to go back and re-run any past job with the guarantee that the exact same code will be used as was used in the previous run. -* @"script"@ specifies the name of the script to run. The script must be given relative to the @crunch_scripts/@ subdirectory of the Git repository. -* @"script_parameters"@ are provided to the script. In this case, the input is the PGP data Collection that we "put in Keep earlier":{{site.baseurl}}/user/tutorials/tutorial-keep.html. -* Setting the @"no_reuse"@ flag tells Crunch not to reuse work from past jobs. This helps ensure that you can watch a new Job process for the rest of this tutorial, without reusing output from a past run that you made, or somebody else marked as public. (If you want to experiment, after the first run below finishes, feel free to edit this job to remove the @"no_reuse"@ line and resubmit it. See what happens!) - -Use @arv job create@ to actually submit the job. It should print out a JSON object which describes the newly created job: - - -
~$ arv job create --job "$(cat ~/the_job)"
-{
- "href":"https://qr1hi.arvadosapi.com/arvados/v1/jobs/qr1hi-8i9sb-1pm1t02dezhupss",
- "kind":"arvados#job",
- "etag":"ax3cn7w9whq2hdh983yxvq09p",
- "uuid":"qr1hi-8i9sb-1pm1t02dezhupss",
- "owner_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "created_at":"2013-12-16T20:44:32Z",
- "modified_by_client_uuid":"qr1hi-ozdt8-obw7foaks3qjyej",
- "modified_by_user_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "modified_at":"2013-12-16T20:44:32Z",
- "updated_at":"2013-12-16T20:44:33Z",
- "submit_id":null,
- "priority":null,
- "script":"hash",
- "script_parameters":{
-  "input":"c1bad4b39ca5a924e481008009d94e32+210"
- },
- "script_version":"d9cd657b733d578ac0d2167dd75967aa4f22e0ac",
- "cancelled_at":null,
- "cancelled_by_client_uuid":null,
- "cancelled_by_user_uuid":null,
- "started_at":null,
- "finished_at":null,
- "output":null,
- "success":null,
- "running":null,
- "is_locked_by_uuid":null,
- "log":null,
- "runtime_constraints":{},
- "tasks_summary":{}
-}
-
-
- -The job is now queued and will start running as soon as it reaches the front of the queue. Fields to pay attention to include: - - * @"uuid"@ is the unique identifier for this specific job. - * @"script_version"@ is the actual revision of the script used. This is useful if the version was described using the "repository:branch" format. - -h2. Monitor job progress - -Go to "*Recent jobs*":{{site.arvados_workbench_host}}/jobs in Workbench. Your job should be near the top of the table. This table refreshes automatically. When the job has completed successfully, it will show finished in the *Status* column. - -h2. Inspect the job output - -On the "Workbench Dashboard":{{site.arvados_workbench_host}}, look for the *Output* column of the *Recent jobs* table. Click on the link under *Output* for your job to go to the files page with the job output. The files page lists all the files that were output by the job. Click on the link under the *file* column to view a file, or click on the download button to download the output file. - -On the command line, you can use @arv job get@ to access a JSON object describing the output: - - -
~$ arv job get --uuid qr1hi-8i9sb-xxxxxxxxxxxxxxx
-{
- "href":"https://qr1hi.arvadosapi.com/arvados/v1/jobs/qr1hi-8i9sb-1pm1t02dezhupss",
- "kind":"arvados#job",
- "etag":"1bk98tdj0qipjy0rvrj03ta5r",
- "uuid":"qr1hi-8i9sb-1pm1t02dezhupss",
- "owner_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "created_at":"2013-12-16T20:44:32Z",
- "modified_by_client_uuid":null,
- "modified_by_user_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "modified_at":"2013-12-16T20:44:55Z",
- "updated_at":"2013-12-16T20:44:55Z",
- "submit_id":null,
- "priority":null,
- "script":"hash",
- "script_parameters":{
-  "input":"c1bad4b39ca5a924e481008009d94e32+210"
- },
- "script_version":"d9cd657b733d578ac0d2167dd75967aa4f22e0ac",
- "cancelled_at":null,
- "cancelled_by_client_uuid":null,
- "cancelled_by_user_uuid":null,
- "started_at":"2013-12-16T20:44:36Z",
- "finished_at":"2013-12-16T20:44:53Z",
- "output":"dd755dbc8d49a67f4fe7dc843e4f10a6+54",
- "success":true,
- "running":false,
- "is_locked_by_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "log":"2afdc6c8b67372ffd22d8ce89d35411f+91",
- "runtime_constraints":{},
- "tasks_summary":{
-  "done":2,
-  "running":0,
-  "failed":0,
-  "todo":0
- }
-}
-
-
- -* @"output"@ is the unique identifier for this specific job's output. This is a Keep collection. Because the output of Arvados jobs should be deterministic, the known expected output is dd755dbc8d49a67f4fe7dc843e4f10a6+54. - -Now you can list the files in the collection: - - -
~$ arv keep ls dd755dbc8d49a67f4fe7dc843e4f10a6+54
-./md5sum.txt
-
-
- -This collection consists of the @md5sum.txt@ file. Use @arv-get@ to show the contents of the @md5sum.txt@ file: - - -
~$ arv-get dd755dbc8d49a67f4fe7dc843e4f10a6+54/md5sum.txt
-44b8ae3fde7a8a88d2f7ebd237625b4f ./var-GS000016015-ASM.tsv.bz2
-
-
- -This MD5 hash matches the MD5 hash which we "computed earlier":{{site.baseurl}}/user/tutorials/tutorial-keep.html. - -h2. The job log - -When the job completes, you can access the job log. On the Workbench, visit "*Recent jobs*":{{site.arvados_workbench_host}}/jobs %(rarr)→% your job's UUID under the *uuid* column %(rarr)→% the collection link on the *log* row. - -On the command line, the Keep identifier listed in the @"log"@ field from @arv job get@ specifies a collection. You can list the files in the collection: - - -
~$ arv keep ls xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx+91
-./qr1hi-8i9sb-xxxxxxxxxxxxxxx.log.txt
-
-
- -The log collection consists of one log file named with the job's UUID. You can access it using @arv-get@: - - -
~$ arv-get xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx+91/qr1hi-8i9sb-xxxxxxxxxxxxxxx.log.txt
-2013-12-16_20:44:35 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  check slurm allocation
-2013-12-16_20:44:35 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  node compute13 - 8 slots
-2013-12-16_20:44:36 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  start
-2013-12-16_20:44:36 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  Install revision d9cd657b733d578ac0d2167dd75967aa4f22e0ac
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  Clean-work-dir exited 0
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  Install exited 0
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  script hash
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  script_version d9cd657b733d578ac0d2167dd75967aa4f22e0ac
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  script_parameters {"input":"c1bad4b39ca5a924e481008009d94e32+210"}
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  runtime_constraints {"max_tasks_per_node":0}
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  start level 0
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  status: 0 done, 0 running, 1 todo
-2013-12-16_20:44:38 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 0 job_task qr1hi-ot0gb-23c1k3kwrf8da62
-2013-12-16_20:44:38 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 0 child 7681 started on compute13.1
-2013-12-16_20:44:38 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  status: 0 done, 1 running, 0 todo
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 0 child 7681 on compute13.1 exit 0 signal 0 success=true
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 0 success in 1 seconds
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 0 output
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  wait for last 0 children to finish
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  status: 1 done, 0 running, 1 todo
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  start level 1
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  status: 1 done, 0 running, 1 todo
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 1 job_task qr1hi-ot0gb-iwr0o3unqothg28
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 1 child 7716 started on compute13.1
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  status: 1 done, 1 running, 0 todo
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 1 child 7716 on compute13.1 exit 0 signal 0 success=true
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 1 success in 13 seconds
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 1 output dd755dbc8d49a67f4fe7dc843e4f10a6+54
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  wait for last 0 children to finish
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  status: 2 done, 0 running, 0 todo
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  release job allocation
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  Freeze not implemented
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  collate
-2013-12-16_20:44:53 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  output dd755dbc8d49a67f4fe7dc843e4f10a6+54+K@qr1hi
-2013-12-16_20:44:53 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  finish
-
-
diff --git a/doc/user/tutorials/add-new-repository.html.textile.liquid b/doc/user/tutorials/add-new-repository.html.textile.liquid index 9d8e768a78..e28b961238 100644 --- a/doc/user/tutorials/add-new-repository.html.textile.liquid +++ b/doc/user/tutorials/add-new-repository.html.textile.liquid @@ -9,7 +9,7 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} -Arvados repositories are managed through the Git revision control system. You can use these repositories to store your crunch scripts and run them in the arvados cluster. +Arvados supports managing git repositories. You can access these repositories using your Arvados credentials and share them with other Arvados users. {% include 'tutorial_expectations' %} diff --git a/doc/user/tutorials/git-arvados-guide.html.textile.liquid b/doc/user/tutorials/git-arvados-guide.html.textile.liquid index 2e255219d2..ad719a66e4 100644 --- a/doc/user/tutorials/git-arvados-guide.html.textile.liquid +++ b/doc/user/tutorials/git-arvados-guide.html.textile.liquid @@ -9,20 +9,13 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} -This tutorial describes how to work with a new Arvados git repository. Working with an Arvados git repository is analogous to working with other public git repositories. It will show you how to upload custom scripts to a remote Arvados repository, so you can use it in Arvados pipelines. +This tutorial describes how to work with an Arvados-managed git repository. Working with an Arvados git repository is very similar to working with other public git repositories. {% include 'tutorial_expectations' %} {% include 'tutorial_git_repo_expectations' %} -{% include 'notebox_begin' %} -For more information about using Git, try - -
$ man gittutorial
-
or *"search Google for Git tutorials":http://google.com/#q=git+tutorial*. -{% include 'notebox_end' %} - -h2. Cloning an Arvados repository +h2. Cloning a git repository Before you start using Git, you should do some basic configuration (you only need to do this the first time): @@ -65,33 +58,22 @@ Create a git branch named *tutorial_branch* in the *tutorial* Arvados git reposi h2. Adding scripts to an Arvados repository -Arvados crunch scripts need to be added in a *crunch_scripts* subdirectory in the repository. If this subdirectory does not exist, first create it in the local repository and change to that directory: - - -
~/tutorial$ mkdir crunch_scripts
-~/tutorial$ cd crunch_scripts
-
- -Next, using @nano@ or your favorite Unix text editor, create a new file called @hash.py@ in the @crunch_scripts@ directory. - -notextile.
~/tutorial/crunch_scripts$ nano hash.py
- -Add the following code to compute the MD5 hash of each file in a collection +A git repository is a good place to store the CWL workflows that you run on Arvados. - {% code 'tutorial_hash_script_py' as python %} +First, create a simple CWL CommandLineTool: -Make the file executable: +notextile.
~/tutorials$ nano hello.cwl
-notextile.
~/tutorial/crunch_scripts$ chmod +x hash.py
+ {% code 'tutorial_hello_cwl' as yaml %} Next, add the file to the git repository. This tells @git@ that the file should be included on the next commit. -notextile.
~/tutorial/crunch_scripts$ git add hash.py
+notextile.
~/tutorial$ git add hello.cwl
Next, commit your changes. All staged changes are recorded into the local git repository: -
~/tutorial/crunch_scripts$ git commit -m "my first script"
+
~/tutorial$ git commit -m "my first script"
 
@@ -102,4 +84,4 @@ Finally, upload your changes to the remote repository:
-Although this tutorial shows how to add a python script to Arvados, the same steps can be used to add any of your custom bash, R, or python scripts to an Arvados repository. +The same steps can be used to add any of your custom bash, R, or python scripts to an Arvados repository. diff --git a/doc/user/tutorials/tutorial-keep-collection-lifecycle.html.textile.liquid b/doc/user/tutorials/tutorial-keep-collection-lifecycle.html.textile.liquid index 2375e8b3d5..c4b9e31ece 100644 --- a/doc/user/tutorials/tutorial-keep-collection-lifecycle.html.textile.liquid +++ b/doc/user/tutorials/tutorial-keep-collection-lifecycle.html.textile.liquid @@ -11,45 +11,42 @@ SPDX-License-Identifier: CC-BY-SA-3.0 During it's lifetime, a keep collection can be in various states. These states are *persisted*, *expiring*, *trashed* and *permanently deleted*. +The nominal state is *persisted* which means the data can be can be accessed normally and will be retained indefinitely. + A collection is *expiring* when it has a *trash_at* time in the future. An expiring collection can be accessed as normal, but is scheduled to be trashed automatically at the *trash_at* time. A collection is *trashed* when it has a *trash_at* time in the past. The *is_trashed* attribute will also be "true". The delete operation immediately puts the collection in the trash by setting the *trash_at* time to "now". Once trashed, the collection is no longer readable through normal data access APIs. The collection will have *delete_at* set to some time in the future. The trashed collection is recoverable until the delete_at time passes, at which point the collection is permanently deleted. -# "*Collection lifecycle attributes*":#collection_attributes # "*Deleting / trashing collections*":#delete-collection # "*Recovering trashed collections*":#trash-recovery +# "*Collection lifecycle attributes*":#collection_attributes {% include 'tutorial_expectations' %} -h2(#collection_attributes). Collection lifecycle attributes - -As listed above the attributes that are used to manage a collection lifecycle are it's *is_trashed*, *trash_at*, and *delete_at*. The table below lists the values of these attributes and how they influence the state of a collection and it's accessibility. - -table(table table-bordered table-condensed). -|_. collection state|_. is_trashed|_. trash_at|_. delete_at|_. get|_. list|_. list?include_trash=true|_. can be modified| -|persisted collection|false |null |null |yes |yes |yes |yes | -|expiring collection|false |future |future |yes |yes |yes |yes | -|trashed collection|true |past |future |no |no |yes |only is_trashed, trash_at and delete_at attribtues| -|deleted collection|true|past |past |no |no |no |no | - h2(#delete-collection). Deleting / trashing collections A collection can be deleted using either the arv command line tool or the workbench. +h3. Trashing a collection using workbench + +To trash a collection using workbench, go to the Data collections tab in the project, and use the trash icon for this collection row. + h3. Trashing a collection using arv command line tool
-arv collection delete --uuid=qr1hi-4zz18-xxxxxxxxxxxxxxx
+arv collection delete --uuid=zzzzz-4zz18-xxxxxxxxxxxxxxx
 
-h3. Trashing a collection using workbench - -To trash a collection using workbench, go to the Data collections tab in the project, and use the trash icon for this collection row. - h2(#trash-recovery). Recovering trashed collections A collection can be un-trashed / recovered using either the arv command line tool or the workbench. +h3. Un-trashing a collection using workbench + +To untrash a collection using workbench, go to trash page on workbench by clicking on the "Trash" icon in the top navigation in workbench and use the recycle icon or selection dropdown option. + +!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/trash-button-topnav.png! + h3. Un-trashing a collection using arv command line tool You can list the trashed collections using the list command. @@ -61,11 +58,16 @@ arv collection list --include-trash=true --filters '[["is_trashed", "=", "true"] You can then untrash a particular collection using arv using it's uuid.
-arv collection untrash --uuid=qr1hi-4zz18-xxxxxxxxxxxxxxx
+arv collection untrash --uuid=zzzzz-4zz18-xxxxxxxxxxxxxxx
 
-h3. Un-trashing a collection using workbench +h2(#collection_attributes). Collection lifecycle attributes -To untrash a collection using workbench, go to trash page on workbench by clicking on the "Trash" icon in the top navigation in workbench and use the recycle icon or selection dropdown option. +As listed above the attributes that are used to manage a collection lifecycle are it's *is_trashed*, *trash_at*, and *delete_at*. The table below lists the values of these attributes and how they influence the state of a collection and it's accessibility. -!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/trash-button-topnav.png! +table(table table-bordered table-condensed). +|_. collection state|_. is_trashed|_. trash_at|_. delete_at|_. get|_. list|_. list?include_trash=true|_. can be modified| +|persisted collection|false |null |null |yes |yes |yes |yes | +|expiring collection|false |future |future |yes |yes |yes |yes | +|trashed collection|true |past |future |no |no |yes |only is_trashed, trash_at and delete_at attribtues| +|deleted collection|true|past |past |no |no |no |no | diff --git a/doc/user/tutorials/tutorial-keep-get.html.textile.liquid b/doc/user/tutorials/tutorial-keep-get.html.textile.liquid index f206d302de..05924f8475 100644 --- a/doc/user/tutorials/tutorial-keep-get.html.textile.liquid +++ b/doc/user/tutorials/tutorial-keep-get.html.textile.liquid @@ -11,11 +11,39 @@ SPDX-License-Identifier: CC-BY-SA-3.0 Arvados Data collections can be downloaded using either the arv commands or using Workbench. -# "*Downloading using arv commands*":#download-using-arv -# "*Downloading using Workbench*":#download-using-workbench -# "*Downloading a shared collection using Workbench*":#download-shared-collection +# "*Download using Workbench*":#download-using-workbench +# "*Sharing collections*":#download-shared-collection +# "*Download using command line tools*":#download-using-arv -h2(#download-using-arv). Downloading using arv commands +h2(#download-using-workbench). Download using Workbench + +You can also download Arvados data collections using the Workbench. + +Visit the Workbench *Dashboard*. Click on *Projects* dropdown menu in the top navigation menu, select your *Home* project. You will see the *Data collections* tab, which lists the collections in this project. + +You can access the contents of a collection by clicking on the * Show* button next to the collection. This will take you to the collection's page. Using this page you can see the collection's contents, and download individual files. + +You can now download the collection files by clicking on the button(s). + +h2(#download-shared-collection). Sharing collections + +h3. Sharing with other Arvados users + +Collections can be shared with other users on the Arvados cluster by sharing the parent project. Navigate to the parent project using the "breadcrumbs" bar, then click on the *Sharing* tab. From the sharing tab, you can choose which users or groups to share with, and their level of access. + +h3. Creating a special download URL + +To share a collection with users that do not have an account on your Arvados cluster, visit the collection page using Workbench as described in the above section. Once on this page, click on the Create sharing link button. + +This will create a sharing link for the collection as shown below. You can copy the sharing link in this page and share it with other users. + +!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/shared-collection.png! + +A user with this url can download this collection by simply accessing this url using browser. It will present a downloadable version of the collection as shown below. + +!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/download-shared-collection.png! + +h2(#download-using-arv). Download using command line tools {% include 'tutorial_expectations' %} @@ -24,38 +52,35 @@ You can download Arvados data collections using the command line tools @arv-ls@ Use @arv-ls@ to view the contents of a collection: -
~$ arv-ls c1bad4b39ca5a924e481008009d94e32+210
-var-GS000016015-ASM.tsv.bz2
+
~$ arv-ls ae480c5099b81e17267b7445e35b4bc7+180
+./HWI-ST1027_129_D0THKACXX.1_1.fastq
+./HWI-ST1027_129_D0THKACXX.1_2.fastq
 
-
~$ arv-ls 887cd41e9c613463eab2f0d885c6dd96+83
-alice.txt
-bob.txt
-carol.txt
-
- - -Use @-s@ to print file sizes rounded up to the nearest kilobyte: +Use @-s@ to print file sizes, in kilobytes, rounded up: -
~$ arv-ls -s c1bad4b39ca5a924e481008009d94e32+210
-221887 var-GS000016015-ASM.tsv.bz2
+
~$ arv-ls -s ae480c5099b81e17267b7445e35b4bc7+180
+     12258 ./HWI-ST1027_129_D0THKACXX.1_1.fastq
+     12258 ./HWI-ST1027_129_D0THKACXX.1_2.fastq
 
Use @arv-get@ to download the contents of a collection and place it in the directory specified in the second argument (in this example, @.@ for the current directory): -
~$ arv-get c1bad4b39ca5a924e481008009d94e32+210/ .
-~$ ls var-GS000016015-ASM.tsv.bz2
-var-GS000016015-ASM.tsv.bz2
+
~$ $ arv-get ae480c5099b81e17267b7445e35b4bc7+180/ .
+23 MiB / 23 MiB 100.0%
+~$ ls
+HWI-ST1027_129_D0THKACXX.1_1.fastq  HWI-ST1027_129_D0THKACXX.1_2.fastq
 
You can also download individual files: -
~$ arv-get 887cd41e9c613463eab2f0d885c6dd96+83/alice.txt .
+
~$ arv-get ae480c5099b81e17267b7445e35b4bc7+180/HWI-ST1027_129_D0THKACXX.1_1.fastq .
+11 MiB / 11 MiB 100.0%
 
@@ -65,33 +90,9 @@ If your cluster is "configured to be part of a federation":{{site.baseurl}}/admi If you request a collection by portable data hash, it will first search the home cluster, then search federated clusters. -You may also request a collection by UUID. In this case, it will contact the cluster named in the UUID prefix (in this example, @qr1hi@). +You may also request a collection by UUID. In this case, it will contact the cluster named in the UUID prefix (in this example, @zzzzz@). -
~$ arv-get qr1hi-4zz18-fw6dnjxtkvzdewt/ .
+
~$ arv-get zzzzz-4zz18-fw6dnjxtkvzdewt/ .
 
- -h2(#download-using-workbench). Downloading using Workbench - -You can also download Arvados data collections using the Workbench. - -Visit the Workbench *Dashboard*. Click on *Projects* dropdown menu in the top navigation menu, select your *Home* project. You will see the *Data collections* tab, which lists the collections in this project. - -You can access the contents of a collection by clicking on the * Show* button next to the collection. This will take you to the collection's page. Using this page you can see the collection's contents, download individual files, and set sharing options. - -You can now download the collection files by clicking on the button(s). - -h2(#download-shared-collection). Downloading a shared collection using Workbench - -Collections can be shared to allow downloads by anonymous users. - -To share a collection with anonymous users, visit the collection page using Workbench as described in the above section. Once on this page, click on the Create sharing link button. - -This will create a sharing link for the collection as shown below. You can copy the sharing link in this page and share it with other users. - -!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/shared-collection.png! - -A user with this url can download this collection by simply accessing this url using browser. It will present a downloadable version of the collection as shown below. - -!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/download-shared-collection.png! diff --git a/doc/user/tutorials/tutorial-keep-mount-gnu-linux.html.textile.liquid b/doc/user/tutorials/tutorial-keep-mount-gnu-linux.html.textile.liquid index e176021992..060ae2acbe 100644 --- a/doc/user/tutorials/tutorial-keep-mount-gnu-linux.html.textile.liquid +++ b/doc/user/tutorials/tutorial-keep-mount-gnu-linux.html.textile.liquid @@ -1,7 +1,7 @@ --- layout: default navsection: userguide -title: "Accessing Keep from GNU/Linux" +title: "Access Keep as a GNU/Linux filesystem" ... {% comment %} Copyright (C) The Arvados Authors. All rights reserved. @@ -9,17 +9,16 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} -This tutoral describes how to access Arvados collections on GNU/Linux using traditional filesystem tools by mounting Keep as a file system using @arv-mount@. +GNU/Linux users can use @arv-mount@ or Gnome to mount Keep as a file system in order to access Arvados collections using traditional filesystem tools. {% include 'tutorial_expectations' %} -h2. Arv-mount +# "*Mounting at the command line with arv-mount*":#arv-mount +# "*Mounting in Gnome File manager*":#gnome -@arv-mount@ provides several features: +h2(#arv-mount). Arv-mount -* You can browse, open and read Keep entries as if they are regular files. -* It is easy for existing tools to access files in Keep. -* Data is streamed on demand. It is not necessary to download an entire file or collection to start processing. +@arv-mount@ provides a file system view of Arvados Keep using File System in Userspace (FUSE). You can browse, open and read Keep entries as if they are regular files, and existing tools can access files in Keep. Data is streamed on demand. It is not necessary to download an entire file or collection to start processing. The default mode permits browsing any collection in Arvados as a subdirectory under the mount directory. To avoid having to fetch a potentially large list of all collections, collection directories only come into existence when explicitly accessed by UUID or portable data hash. For instance, a collection may be found by its content hash in the @keep/by_id@ directory. @@ -59,3 +58,11 @@ Not supported: If multiple clients (separate instances of arv-mount or other arvados applications) modify the same file in the same collection within a short time interval, this may result in a conflict. In this case, the most recent commit wins, and the "loser" will be renamed to a conflict file in the form @name~YYYYMMDD-HHMMSS~conflict~@. Please note this feature is in beta testing. In particular, the conflict mechanism is itself currently subject to race conditions with potential for data loss when a collection is being modified simultaneously by multiple clients. This issue will be resolved in future development. + +h2(#gnome). Mounting in Gnome File manager + +As an alternative to @arv-mount@ you can also access the WebDAV mount through the Gnome File manager. + +# Open "Files" +# On the left sidebar, click on "Other Locations" +# At the bottom of the window, enter @davs://collections.ClusterID.example.com/@ When prompted for credentials, enter username "arvados" and a valid Arvados token in the @Password@ field. diff --git a/doc/user/tutorials/tutorial-keep-mount-os-x.html.textile.liquid b/doc/user/tutorials/tutorial-keep-mount-os-x.html.textile.liquid index 9397d61e05..911b8808eb 100644 --- a/doc/user/tutorials/tutorial-keep-mount-os-x.html.textile.liquid +++ b/doc/user/tutorials/tutorial-keep-mount-os-x.html.textile.liquid @@ -1,7 +1,7 @@ --- layout: default navsection: userguide -title: "Accessing Keep from OS X" +title: "Access Keep from macOS Finder" ... {% comment %} Copyright (C) The Arvados Authors. All rights reserved. @@ -9,16 +9,16 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} -OS X users can browse Keep read-only via WebDAV. Specific collections can also be accessed read-write via WebDAV. +Users of macOS can browse Keep read-only via WebDAV. Specific collections can also be accessed read-write via WebDAV. -h3. Browsing Keep (read-only) +h3. Browsing Keep in Finder (read-only) -In Finder, use "Connect to Server..." under the "Go" menu and enter @https://collections.ClusterID.example.com/@ in popup dialog. When prompted for credentials, put a valid Arvados token in the @Password@ field and anything in the Name field (it will be ignored by Arvados). +In Finder, use "Connect to Server..." under the "Go" menu and enter @https://collections.ClusterID.example.com/@ in popup dialog. When prompted for credentials, enter username "arvados" and paste a valid Arvados token for the @Password@ field. This mount is read-only. Write support for the @/users/@ directory is planned for a future release. h3. Accessing a specific collection in Keep (read-write) -In Finder, use "Connect to Server..." under the "Go" menu and enter @https://collections.ClusterID.example.com/@ in popup dialog. When prompted for credentials, put a valid Arvados token in the @Password@ field and anything in the Name field (it will be ignored by Arvados). +In Finder, use "Connect to Server..." under the "Go" menu and enter @https://collections.ClusterID.example.com/c=your-collection-uuid@ in popup dialog. When prompted for credentials, put a valid Arvados token in the @Password@ field and anything in the Name field (it will be ignored by Arvados). This collection is now accessible read/write. diff --git a/doc/user/tutorials/tutorial-keep-mount-windows.html.textile.liquid b/doc/user/tutorials/tutorial-keep-mount-windows.html.textile.liquid index 29b28fff9c..a40a997ba1 100644 --- a/doc/user/tutorials/tutorial-keep-mount-windows.html.textile.liquid +++ b/doc/user/tutorials/tutorial-keep-mount-windows.html.textile.liquid @@ -1,7 +1,7 @@ --- layout: default navsection: userguide -title: "Accessing Keep from Windows" +title: "Access Keep from Windows File Explorer" ... {% comment %} Copyright (C) The Arvados Authors. All rights reserved. @@ -11,7 +11,7 @@ SPDX-License-Identifier: CC-BY-SA-3.0 Windows users can browse Keep read-only via WebDAV. Specific collections can also be accessed read-write via WebDAV. -h3. Browsing Keep (read-only) +h3. Browsing Keep in File Explorer (read-only) Use the 'Map network drive' functionality, and enter @https://collections.ClusterID.example.com/@ in the Folder field. When prompted for credentials, you can fill in an arbitrary string for @Username@, it is ignored by Arvados. Windows will not accept an empty @Username@. Put a valid Arvados token in the @Password@ field. diff --git a/doc/user/tutorials/tutorial-keep.html.textile.liquid b/doc/user/tutorials/tutorial-keep.html.textile.liquid index ec7086db96..21efc475c5 100644 --- a/doc/user/tutorials/tutorial-keep.html.textile.liquid +++ b/doc/user/tutorials/tutorial-keep.html.textile.liquid @@ -9,13 +9,44 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} -Arvados Data collections can be uploaded using either the @arv-put@ command line tool or using Workbench. +Arvados Data collections can be uploaded using either Workbench or the @arv-put@ command line tool. -# "*Upload using command line tool*":#upload-using-command # "*Upload using Workbench*":#upload-using-workbench +# "*Creating projects*":#creating-projects +# "*Upload using command line tool*":#upload-using-command + +h2(#upload-using-workbench). Upload using Workbench + +To upload using Workbench, visit the Workbench *Dashboard*. Click on *Projects* dropdown menu in the top navigation menu and select your *Home* project or any other project of your choosing. You will see the *Data collections* tab for this project, which lists the collections in this project. + +To upload files into a new collection, click on *Add data* dropdown menu and select *Upload files from my computer*. + +!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/upload-using-workbench.png! + +
This will create a new empty collection in your chosen project and will take you to the *Upload* tab for that collection. + +!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/upload-tab-in-new-collection.png! + +Click on the *Browse...* button and select the files you would like to upload. Selected files will be added to a list of files to be uploaded. After you are done selecting files to upload, click on the * Start* button to start upload. This will start uploading files to Arvados and Workbench will show you the progress bar. When upload is completed, you will see an indication to that effect. + +!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/files-uploaded.png! + +*Note:* If you leave the collection page during the upload, the upload process will be aborted and you will need to upload the files again. + +*Note:* You can also use the Upload tab to add additional files to an existing collection. notextile.
+h2(#creating-projects). Creating projects + +Files are organized into Collections, and Collections are organized by Projects. + +Click on *Projects* *Add a new project* to add a top level project. + +To create a subproject, navigate to the parent project, and click on *Add a subproject*. + +See "Sharing collections":tutorial-keep-get.html#download-shared-collection for information about sharing projects and collections with other users. + h2(#upload-using-command). Upload using command line tool {% include 'tutorial_expectations' %} @@ -25,12 +56,12 @@ To upload a file to Keep using @arv-put@:
~$ arv-put var-GS000016015-ASM.tsv.bz2
 216M / 216M 100.0%
 Collection saved as ...
-qr1hi-4zz18-xxxxxxxxxxxxxxx
+zzzzz-4zz18-xxxxxxxxxxxxxxx
 
-The output value @qr1hi-4zz18-xxxxxxxxxxxxxxx@ is the uuid of the Arvados collection created. +The output value @zzzzz-4zz18-xxxxxxxxxxxxxxx@ is the uuid of the Arvados collection created. Note: The file used in this example is a freely available TSV file containing variant annotations from the "Personal Genome Project (PGP)":http://www.pgp-hms.org participant "hu599905":https://my.pgp-hms.org/profile/hu599905), downloadable "here":https://warehouse.pgp-hms.org/warehouse/f815ec01d5d2f11cb12874ab2ed50daa+234+K@ant/var-GS000016015-ASM.tsv.bz2. Alternatively, you can replace @var-GS000016015-ASM.tsv.bz2@ with the name of any file you have locally, or you could get the TSV file by "downloading it from Keep.":{{site.baseurl}}/user/tutorials/tutorial-keep-get.html @@ -44,7 +75,7 @@ Note: The file used in this example is a freely available TSV file containing va ~$ arv-put tmp 0M / 0M 100.0% Collection saved as ... -qr1hi-4zz18-yyyyyyyyyyyyyyy +zzzzz-4zz18-yyyyyyyyyyyyyyy
@@ -63,23 +94,3 @@ To move the collection to a different project, check the box at the left of the Click on the * Show* button next to the collection's listing on a project page to go to the Workbench page for your collection. On this page, you can see the collection's contents, download individual files, and set sharing options. notextile. - -h2(#upload-using-workbench). Upload using Workbench - -To upload using Workbench, visit the Workbench *Dashboard*. Click on *Projects* dropdown menu in the top navigation menu and select your *Home* project or any other project of your choosing. You will see the *Data collections* tab for this project, which lists the collections in this project. - -To upload files into a new collection, click on *Add data* dropdown menu and select *Upload files from my computer*. - -!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/upload-using-workbench.png! - -
This will create a new empty collection in your chosen project and will take you to the *Upload* tab for that collection. - -!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/upload-tab-in-new-collection.png! - -Click on the *Browse...* button and select the files you would like to upload. Selected files will be added to a list of files to be uploaded. After you are done selecting files to upload, click on the * Start* button to start upload. This will start uploading files to Arvados and Workbench will show you the progress bar. When upload is completed, you will see an indication to that effect. - -!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/files-uploaded.png! - -*Note:* If you leave the collection page during the upload, the upload process will be aborted and you will need to upload the files again. - -*Note:* You can also use the Upload tab to add additional files to an existing collection. diff --git a/doc/user/tutorials/tutorial-workflow-workbench.html.textile.liquid b/doc/user/tutorials/tutorial-workflow-workbench.html.textile.liquid index 8dcb8e674e..8a08225723 100644 --- a/doc/user/tutorials/tutorial-workflow-workbench.html.textile.liquid +++ b/doc/user/tutorials/tutorial-workflow-workbench.html.textile.liquid @@ -23,13 +23,15 @@ notextile.
# Start from the *Workbench Dashboard*. You can access the Dashboard by clicking on * Dashboard* in the upper left corner of any Workbench page. # Click on the Run a process... button. This will open a dialog box titled *Choose a pipeline or workflow to run*. -# In the search box, type in *Tutorial bwa mem cwl*. -# Select * Tutorial bwa mem cwl* and click the Next: choose inputs button. This will create a new process in your *Home* project and will open it. You can now supply the inputs for the process. Please note that all required inputs are populated with default values and you can change them if you prefer. -# For example, let's see how to change *"reference" parameter* for this workflow. Click the Choose button beneath the *"reference" parameter* header. This will open a dialog box titled *Choose a dataset for "reference" parameter for cwl-runner in bwa-mem.cwl component*. -# Open the *Home * menu and select *All Projects*. Search for and select * Tutorial chromosome 19 reference*. You will then see a list of files. Select * 19-fasta.bwt* and click the OK button. -# Repeat the previous two steps to set the *"read_p1" parameter for cwl-runner script in bwa-mem.cwl component* and *"read_p2" parameter for cwl-runner script in bwa-mem.cwl component* parameters. -# Click on the Run button. The page updates to show you that the process has been submitted to run on the Arvados cluster. -# After the process starts running, you can track the progress by watching log messages from the component(s). This page refreshes automatically. You will see a complete label when the process completes successfully. +# In the search box, type in *bwa-mem.cwl*. +# Select * bwa-mem.cwl* and click the Next: choose inputs button. This will create a new process in your *Home* project and will open it. You can now supply the inputs for the process. Please note that all required inputs are populated with default values and you can change them if you prefer. +# For example, let's see how to set read pair *read_p1* and *read_p2* for this workflow. Click the Choose button beneath the *read_p1* header. This will open a dialog box titled *Choose a file*. +# In the file dialog, click on *Home * menu and then select *All Projects*. +# Enter *HWI-ST1027* into the search box. You will see one or more collections. Click on * HWI-ST1027_129_D0THKACXX for CWL tutorial* +# The right hand panel will list two files. Click on the first one ending in "_1" and click the OK button. +# Repeat the steps 5-8 to set the *read_p2* except selecting the second file ending in "_2" +# Scroll to the bottom of the "Inputs" panel and click on the Run button. The page updates to show you that the process has been submitted to run on the Arvados cluster. +# Once the process starts running, you can track the progress by watching log messages from the component(s). This page refreshes automatically. You will see a complete label when the process completes successfully. # Click on the *Output* link to see the results of the process. This will load a new page listing the output files from this process. You'll see the output SAM file from the alignment tool under the *Files* tab. # Click on the download button to the right of the SAM file to download your results. diff --git a/doc/user/tutorials/writing-cwl-workflow.html.textile.liquid b/doc/user/tutorials/writing-cwl-workflow.html.textile.liquid index dd537c46ac..0166b8b525 100644 --- a/doc/user/tutorials/writing-cwl-workflow.html.textile.liquid +++ b/doc/user/tutorials/writing-cwl-workflow.html.textile.liquid @@ -1,7 +1,7 @@ --- layout: default navsection: userguide -title: "Writing a CWL workflow" +title: "Developing workflows with CWL" ... {% comment %} Copyright (C) The Arvados Authors. All rights reserved. @@ -15,7 +15,7 @@ SPDX-License-Identifier: CC-BY-SA-3.0 h2. Developing workflows -For an introduction and and detailed documentation about writing CWL, see the "CWL User Guide":https://www.commonwl.org/user_guide and the "CWL Specification":http://commonwl.org/v1.1 . +For an introduction and and detailed documentation about writing CWL, see the "CWL User Guide":https://www.commonwl.org/user_guide and the "CWL Specification":http://commonwl.org/v1.2 . See "Writing Portable High-Performance Workflows":{{site.baseurl}}/user/cwl/cwl-style.html and "Arvados CWL Extensions":{{site.baseurl}}/user/cwl/cwl-extensions.html for additional information about using CWL on Arvados. @@ -23,65 +23,6 @@ See "Repositories of CWL Tools and Workflows":https://www.commonwl.org/#Reposito See "Software for working with CWL":https://www.commonwl.org/#Software_for_working_with_CWL for links to software tools to help create CWL documents. -h2. Using Composer - -You can create new workflows in the browser using "Arvados Composer":{{site.baseurl}}/user/composer/composer.html - -h2. Registering a workflow to use in Workbench - -Use @--create-workflow@ to register a CWL workflow with Arvados. This enables you to share workflows with other Arvados users, and run them by clicking the Run a process... button on the Workbench Dashboard and on the command line by UUID. - - -
~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner --create-workflow bwa-mem.cwl
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Upload local files: "bwa-mem.cwl"
-2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Uploaded to qr1hi-4zz18-7e0hedrmkuyoei3
-2016-07-01 12:21:01 arvados.cwl-runner[15796] INFO: Created template qr1hi-p5p6p-rjleou1dwr167v5
-qr1hi-p5p6p-rjleou1dwr167v5
-
-
- -You can provide a partial input file to set default values for the workflow input parameters. You can also use the @--name@ option to set the name of the workflow: - - -
~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner --name "My workflow with defaults" --create-workflow bwa-mem.cwl bwa-mem-template.yml
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Upload local files: "bwa-mem.cwl"
-2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Uploaded to qr1hi-4zz18-0f91qkovk4ml18o
-2016-07-01 14:09:50 arvados.cwl-runner[3730] INFO: Created template qr1hi-p5p6p-0deqe6nuuyqns2i
-qr1hi-p5p6p-zuniv58hn8d0qd8
-
-
- -h3. Running registered workflows at the command line - -You can run a registered workflow at the command line by its UUID: - - -
~/arvados/doc/user/cwl/bwa-mem$ arvados-cwl-runner qr1hi-p5p6p-zuniv58hn8d0qd8 --help
-/home/peter/work/scripts/venv/bin/arvados-cwl-runner 0d62edcb9d25bf4dcdb20d8872ea7b438e12fc59 1.0.20161209192028, arvados-python-client 0.1.20161212125425, cwltool 1.0.20161207161158
-Resolved 'qr1hi-p5p6p-zuniv58hn8d0qd8' to 'keep:655c6cd07550151b210961ed1d3852cf+57/bwa-mem.cwl'
-usage: qr1hi-p5p6p-zuniv58hn8d0qd8 [-h] [--PL PL] --group_id GROUP_ID
-                                   --read_p1 READ_P1 [--read_p2 READ_P2]
-                                   [--reference REFERENCE] --sample_id
-                                   SAMPLE_ID
-                                   [job_order]
-
-positional arguments:
-  job_order             Job input json file
-
-optional arguments:
-  -h, --help            show this help message and exit
-  --PL PL
-  --group_id GROUP_ID
-  --read_p1 READ_P1     The reads, in fastq format.
-  --read_p2 READ_P2     For mate paired reads, the second file (optional).
-  --reference REFERENCE
-                        The index files produced by `bwa index`
-  --sample_id SAMPLE_ID
-
-
- h2. Using cwltool When developing a workflow, it is often helpful to run it on the local host to avoid the overhead of submitting to the cluster. To execute a workflow only on the local host (without submitting jobs to an Arvados cluster) you can use the @cwltool@ command. Note that when using @cwltool@ you must have the input data accessible on the local file system using either @arv-mount@ or @arv-get@ to fetch the data from Keep. @@ -150,60 +91,3 @@ Final process status is success If you get the error @JavascriptException: Long-running script killed after 20 seconds.@ this may be due to the Dockerized Node.js engine taking too long to start. You may address this by installing Node.js locally (run @apt-get install nodejs@ on Debian or Ubuntu) or by specifying a longer timeout with the @--eval-timeout@ option. For example, run the workflow with @cwltool --eval-timeout=40@ for a 40-second timeout. - -h2. Making workflows directly executable - -You can make a workflow file directly executable (@cwl-runner@ should be an alias to @arvados-cwl-runner@) by adding the following line to the top of the file: - - -
#!/usr/bin/env cwl-runner
-
-
- - -
~/arvados/doc/user/cwl/bwa-mem$ ./bwa-mem.cwl bwa-mem-input.yml
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
-2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
-{
-    "aligned_sam": {
-        "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
-        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
-        "class": "File",
-        "size": 30738986
-    }
-}
-
-
- -You can even make an input file directly executable the same way with the following two lines at the top: - - -
#!/usr/bin/env cwl-runner
-cwl:tool: bwa-mem.cwl
-
-
- - -
~/arvados/doc/user/cwl/bwa-mem$ ./bwa-mem-input.yml
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
-2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
-{
-    "aligned_sam": {
-        "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
-        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
-        "class": "File",
-        "size": 30738986
-    }
-}
-
-
diff --git a/go.mod b/go.mod index 71052882ad..262978d912 100644 --- a/go.mod +++ b/go.mod @@ -4,8 +4,12 @@ go 1.13 require ( github.com/AdRoll/goamz v0.0.0-20170825154802-2731d20f46f4 - github.com/Azure/azure-sdk-for-go v19.1.0+incompatible - github.com/Azure/go-autorest v10.15.2+incompatible + github.com/Azure/azure-sdk-for-go v45.1.0+incompatible + github.com/Azure/go-autorest v14.2.0+incompatible + github.com/Azure/go-autorest/autorest v0.11.3 + github.com/Azure/go-autorest/autorest/azure/auth v0.5.1 + github.com/Azure/go-autorest/autorest/to v0.4.0 + github.com/Azure/go-autorest/autorest/validation v0.3.0 // indirect github.com/Microsoft/go-winio v0.4.5 // indirect github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7 // indirect github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 // indirect @@ -16,8 +20,6 @@ require ( github.com/bradleypeabody/godap v0.0.0-20170216002349-c249933bc092 github.com/coreos/go-oidc v2.1.0+incompatible github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7 - github.com/dgrijalva/jwt-go v3.1.0+incompatible // indirect - github.com/dimchansky/utfbom v1.0.0 // indirect github.com/dnaeon/go-vcr v1.0.1 // indirect github.com/docker/distribution v2.6.0-rc.1.0.20180105232752-277ed486c948+incompatible // indirect github.com/docker/docker v1.4.2-0.20180109013817-94b8a116fbf1 @@ -44,7 +46,6 @@ require ( github.com/kevinburke/ssh_config v0.0.0-20171013211458-802051befeb5 // indirect github.com/lib/pq v1.3.0 github.com/marstr/guid v1.1.1-0.20170427235115-8bdf7d1a087c // indirect - github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747 // indirect github.com/msteinert/pam v0.0.0-20190215180659-f29b9f28d6f9 github.com/opencontainers/go-digest v1.0.0-rc1 // indirect github.com/opencontainers/image-spec v1.0.1-0.20171125024018-577479e4dc27 // indirect @@ -57,9 +58,8 @@ require ( github.com/sergi/go-diff v1.0.0 // indirect github.com/sirupsen/logrus v1.4.2 github.com/src-d/gcfg v1.3.0 // indirect - github.com/stretchr/testify v1.4.0 // indirect github.com/xanzy/ssh-agent v0.1.0 // indirect - golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550 + golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 golang.org/x/net v0.0.0-20200202094626-16171245cfb2 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45 golang.org/x/sys v0.0.0-20191105231009-c1f44814a5cd diff --git a/go.sum b/go.sum index ac5c03fc83..85d205112f 100644 --- a/go.sum +++ b/go.sum @@ -2,10 +2,40 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.38.0 h1:ROfEUZz+Gh5pa62DJWXSaonyu3StP6EA6lPEXPI6mCo= cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= +github.com/Azure/azure-sdk-for-go v0.2.0-beta h1:wYBqYNMWr0WL2lcEZi+dlK9n+N0wJ0Pjs4BKeOnDjfQ= github.com/Azure/azure-sdk-for-go v19.1.0+incompatible h1:ysqLW+tqZjJWOTE74heH/pDRbr4vlN3yV+dqQYgpyxw= github.com/Azure/azure-sdk-for-go v19.1.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= +github.com/Azure/azure-sdk-for-go v20.2.0+incompatible h1:La3ODnagAOf5ZFUepTfVftvNTdxkq06DNpgi1l0yaM0= +github.com/Azure/azure-sdk-for-go v20.2.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= +github.com/Azure/azure-sdk-for-go v45.1.0+incompatible h1:kxtaPD8n2z5Za+9e3sKsYG2IX6PG2R6VXtgS7gAbh3A= +github.com/Azure/azure-sdk-for-go v45.1.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc= +github.com/Azure/go-autorest v1.1.1 h1:4G9tVCqooRY3vDTB2bA1Z01PlSALtnUbji0AfzthUSs= github.com/Azure/go-autorest v10.15.2+incompatible h1:oZpnRzZie83xGV5txbT1aa/7zpCPvURGhV6ThJij2bs= github.com/Azure/go-autorest v10.15.2+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= +github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs= +github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24= +github.com/Azure/go-autorest/autorest v0.11.0/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw= +github.com/Azure/go-autorest/autorest v0.11.3 h1:fyYnmYujkIXUgv88D9/Wo2ybE4Zwd/TmQd5sSI5u2Ws= +github.com/Azure/go-autorest/autorest v0.11.3/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw= +github.com/Azure/go-autorest/autorest/adal v0.9.0/go.mod h1:/c022QCutn2P7uY+/oQWWNcK9YU+MH96NgK+jErpbcg= +github.com/Azure/go-autorest/autorest/adal v0.9.2 h1:Aze/GQeAN1RRbGmnUJvUj+tFGBzFdIg3293/A9rbxC4= +github.com/Azure/go-autorest/autorest/adal v0.9.2/go.mod h1:/3SMAM86bP6wC9Ev35peQDUeqFZBMH07vvUOmg4z/fE= +github.com/Azure/go-autorest/autorest/azure/auth v0.5.1 h1:bvUhZciHydpBxBmCheUgxxbSwJy7xcfjkUsjUcqSojc= +github.com/Azure/go-autorest/autorest/azure/auth v0.5.1/go.mod h1:ea90/jvmnAwDrSooLH4sRIehEPtG/EPUXavDh31MnA4= +github.com/Azure/go-autorest/autorest/azure/cli v0.4.0 h1:Ml+UCrnlKD+cJmSzrZ/RDcDw86NjkRUpnFh7V5JUhzU= +github.com/Azure/go-autorest/autorest/azure/cli v0.4.0/go.mod h1:JljT387FplPzBA31vUcvsetLKF3pec5bdAxjVU4kI2s= +github.com/Azure/go-autorest/autorest/date v0.3.0 h1:7gUk1U5M/CQbp9WoqinNzJar+8KY+LPI6wiWrP/myHw= +github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74= +github.com/Azure/go-autorest/autorest/mocks v0.4.0/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= +github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k= +github.com/Azure/go-autorest/autorest/to v0.4.0 h1:oXVqrxakqqV1UZdSazDOPOLvOIz+XA683u8EctwboHk= +github.com/Azure/go-autorest/autorest/to v0.4.0/go.mod h1:fE8iZBn7LQR7zH/9XU2NcPR4o9jEImooCeWJcYV/zLE= +github.com/Azure/go-autorest/autorest/validation v0.3.0 h1:3I9AAI63HfcLtphd9g39ruUwRI+Ca+z/f36KHPFRUss= +github.com/Azure/go-autorest/autorest/validation v0.3.0/go.mod h1:yhLgjC0Wda5DYXl6JAsWyUe4KVNffhoDhG0zVzUMo3E= +github.com/Azure/go-autorest/logger v0.2.0 h1:e4RVHVZKC5p6UANLJHkM4OfR1UKZPj8Wt8Pcx+3oqrE= +github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8= +github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo= +github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/Microsoft/go-winio v0.4.5 h1:U2XsGR5dBg1yzwSEJoP2dE2/aAXpmad+CNG2hE9Pd5k= github.com/Microsoft/go-winio v0.4.5/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA= @@ -48,8 +78,12 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgrijalva/jwt-go v3.1.0+incompatible h1:FFziAwDQQ2dz1XClWMkwvukur3evtZx7x/wMHKM1i20= github.com/dgrijalva/jwt-go v3.1.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dimchansky/utfbom v1.0.0 h1:fGC2kkf4qOoKqZ4q7iIh+Vef4ubC1c38UDsEyZynZPc= github.com/dimchansky/utfbom v1.0.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8= +github.com/dimchansky/utfbom v1.1.0 h1:FcM3g+nofKgUteL8dm/UpdRXNC9KmADgTpLKsu0TRo4= +github.com/dimchansky/utfbom v1.1.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8= github.com/dnaeon/go-vcr v1.0.1 h1:r8L/HqC0Hje5AXMu1ooW8oyQyOFv4GxqpL0nRP7SLLY= github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E= github.com/docker/distribution v2.6.0-rc.1.0.20180105232752-277ed486c948+incompatible h1:PVtvnmmxSMUcT5AY6vG7sCCzRg3eyoW6vQvXtITC60c= @@ -144,6 +178,8 @@ github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0j github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747 h1:eQox4Rh4ewJF+mqYPxCkmBAirRnPaHEB26UkNuPyjlk= github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= @@ -207,6 +243,8 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90Pveol golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550 h1:ObdrDkeb4kJdCP557AjRjq69pTHfNouLtWZG7j9rPN8= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= diff --git a/lib/boot/supervisor.go b/lib/boot/supervisor.go index e38a4775e8..3f4fb74822 100644 --- a/lib/boot/supervisor.go +++ b/lib/boot/supervisor.go @@ -601,7 +601,7 @@ func (super *Supervisor) autofillConfig(cfg *arvados.Config) error { } if len(svc.InternalURLs) == 0 { svc.InternalURLs = map[arvados.URL]arvados.ServiceInstance{ - arvados.URL{Scheme: "http", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost)), Path: "/"}: arvados.ServiceInstance{}, + {Scheme: "http", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost)), Path: "/"}: {}, } } } diff --git a/lib/cloud/azure/azure.go b/lib/cloud/azure/azure.go index 6de367aa25..ba8a836dd0 100644 --- a/lib/cloud/azure/azure.go +++ b/lib/cloud/azure/azure.go @@ -8,6 +8,7 @@ import ( "context" "encoding/base64" "encoding/json" + "errors" "fmt" "net/http" "regexp" @@ -18,7 +19,7 @@ import ( "git.arvados.org/arvados.git/lib/cloud" "git.arvados.org/arvados.git/sdk/go/arvados" - "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute" + "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network" storageacct "github.com/Azure/azure-sdk-for-go/services/storage/mgmt/2018-02-01/storage" "github.com/Azure/azure-sdk-for-go/storage" @@ -35,20 +36,23 @@ import ( var Driver = cloud.DriverFunc(newAzureInstanceSet) type azureInstanceSetConfig struct { - SubscriptionID string - ClientID string - ClientSecret string - TenantID string - CloudEnvironment string - ResourceGroup string - Location string - Network string - NetworkResourceGroup string - Subnet string - StorageAccount string - BlobContainer string - DeleteDanglingResourcesAfter arvados.Duration - AdminUsername string + SubscriptionID string + ClientID string + ClientSecret string + TenantID string + CloudEnvironment string + ResourceGroup string + ImageResourceGroup string + Location string + Network string + NetworkResourceGroup string + Subnet string + StorageAccount string + BlobContainer string + SharedImageGalleryName string + SharedImageGalleryImageVersion string + DeleteDanglingResourcesAfter arvados.Duration + AdminUsername string } type containerWrapper interface { @@ -138,6 +142,25 @@ func (cl *interfacesClientImpl) listComplete(ctx context.Context, resourceGroupN return r, wrapAzureError(err) } +type disksClientWrapper interface { + listByResourceGroup(ctx context.Context, resourceGroupName string) (result compute.DiskListPage, err error) + delete(ctx context.Context, resourceGroupName string, diskName string) (result compute.DisksDeleteFuture, err error) +} + +type disksClientImpl struct { + inner compute.DisksClient +} + +func (cl *disksClientImpl) listByResourceGroup(ctx context.Context, resourceGroupName string) (result compute.DiskListPage, err error) { + r, err := cl.inner.ListByResourceGroup(ctx, resourceGroupName) + return r, wrapAzureError(err) +} + +func (cl *disksClientImpl) delete(ctx context.Context, resourceGroupName string, diskName string) (result compute.DisksDeleteFuture, err error) { + r, err := cl.inner.Delete(ctx, resourceGroupName, diskName) + return r, wrapAzureError(err) +} + var quotaRe = regexp.MustCompile(`(?i:exceed|quota|limit)`) type azureRateLimitError struct { @@ -196,20 +219,23 @@ func wrapAzureError(err error) error { } type azureInstanceSet struct { - azconfig azureInstanceSetConfig - vmClient virtualMachinesClientWrapper - netClient interfacesClientWrapper - blobcont containerWrapper - azureEnv azure.Environment - interfaces map[string]network.Interface - dispatcherID string - namePrefix string - ctx context.Context - stopFunc context.CancelFunc - stopWg sync.WaitGroup - deleteNIC chan string - deleteBlob chan storage.Blob - logger logrus.FieldLogger + azconfig azureInstanceSetConfig + vmClient virtualMachinesClientWrapper + netClient interfacesClientWrapper + disksClient disksClientWrapper + imageResourceGroup string + blobcont containerWrapper + azureEnv azure.Environment + interfaces map[string]network.Interface + dispatcherID string + namePrefix string + ctx context.Context + stopFunc context.CancelFunc + stopWg sync.WaitGroup + deleteNIC chan string + deleteBlob chan storage.Blob + deleteDisk chan compute.Disk + logger logrus.FieldLogger } func newAzureInstanceSet(config json.RawMessage, dispatcherID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger) (prv cloud.InstanceSet, err error) { @@ -233,6 +259,7 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str az.azconfig = azcfg vmClient := compute.NewVirtualMachinesClient(az.azconfig.SubscriptionID) netClient := network.NewInterfacesClient(az.azconfig.SubscriptionID) + disksClient := compute.NewDisksClient(az.azconfig.SubscriptionID) storageAcctClient := storageacct.NewAccountsClient(az.azconfig.SubscriptionID) az.azureEnv, err = azure.EnvironmentFromName(az.azconfig.CloudEnvironment) @@ -253,26 +280,38 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str vmClient.Authorizer = authorizer netClient.Authorizer = authorizer + disksClient.Authorizer = authorizer storageAcctClient.Authorizer = authorizer az.vmClient = &virtualMachinesClientImpl{vmClient} az.netClient = &interfacesClientImpl{netClient} + az.disksClient = &disksClientImpl{disksClient} - result, err := storageAcctClient.ListKeys(az.ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount) - if err != nil { - az.logger.WithError(err).Warn("Couldn't get account keys") - return err + az.imageResourceGroup = az.azconfig.ImageResourceGroup + if az.imageResourceGroup == "" { + az.imageResourceGroup = az.azconfig.ResourceGroup } - key1 := *(*result.Keys)[0].Value - client, err := storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv) - if err != nil { - az.logger.WithError(err).Warn("Couldn't make client") - return err - } + var client storage.Client + if az.azconfig.StorageAccount != "" && az.azconfig.BlobContainer != "" { + result, err := storageAcctClient.ListKeys(az.ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount) + if err != nil { + az.logger.WithError(err).Warn("Couldn't get account keys") + return err + } - blobsvc := client.GetBlobService() - az.blobcont = blobsvc.GetContainerReference(az.azconfig.BlobContainer) + key1 := *(*result.Keys)[0].Value + client, err = storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv) + if err != nil { + az.logger.WithError(err).Warn("Couldn't make client") + return err + } + + blobsvc := client.GetBlobService() + az.blobcont = blobsvc.GetContainerReference(az.azconfig.BlobContainer) + } else if az.azconfig.StorageAccount != "" || az.azconfig.BlobContainer != "" { + az.logger.Error("Invalid configuration: StorageAccount and BlobContainer must both be empty or both be set") + } az.dispatcherID = dispatcherID az.namePrefix = fmt.Sprintf("compute-%s-", az.dispatcherID) @@ -288,21 +327,21 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str tk.Stop() return case <-tk.C: - az.manageBlobs() + if az.blobcont != nil { + az.manageBlobs() + } + az.manageDisks() } } }() az.deleteNIC = make(chan string) az.deleteBlob = make(chan storage.Blob) + az.deleteDisk = make(chan compute.Disk) for i := 0; i < 4; i++ { go func() { - for { - nicname, ok := <-az.deleteNIC - if !ok { - return - } + for nicname := range az.deleteNIC { _, delerr := az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, nicname) if delerr != nil { az.logger.WithError(delerr).Warnf("Error deleting %v", nicname) @@ -312,11 +351,7 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str } }() go func() { - for { - blob, ok := <-az.deleteBlob - if !ok { - return - } + for blob := range az.deleteBlob { err := blob.Delete(nil) if err != nil { az.logger.WithError(err).Warnf("Error deleting %v", blob.Name) @@ -325,11 +360,28 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str } } }() + go func() { + for disk := range az.deleteDisk { + _, err := az.disksClient.delete(az.ctx, az.imageResourceGroup, *disk.Name) + if err != nil { + az.logger.WithError(err).Warnf("Error deleting disk %+v", *disk.Name) + } else { + az.logger.Printf("Deleted disk %v", *disk.Name) + } + } + }() } return nil } +func (az *azureInstanceSet) cleanupNic(nic network.Interface) { + _, delerr := az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, *nic.Name) + if delerr != nil { + az.logger.WithError(delerr).Warnf("Error cleaning up NIC after failed create") + } +} + func (az *azureInstanceSet) Create( instanceType arvados.InstanceType, imageID cloud.ImageID, @@ -389,14 +441,55 @@ func (az *azureInstanceSet) Create( return nil, wrapAzureError(err) } - blobname := fmt.Sprintf("%s-os.vhd", name) - instanceVhd := fmt.Sprintf("https://%s.blob.%s/%s/%s", - az.azconfig.StorageAccount, - az.azureEnv.StorageEndpointSuffix, - az.azconfig.BlobContainer, - blobname) - + var blobname string customData := base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\n" + initCommand + "\n")) + var storageProfile *compute.StorageProfile + + re := regexp.MustCompile(`^http(s?)://`) + if re.MatchString(string(imageID)) { + if az.blobcont == nil { + az.cleanupNic(nic) + return nil, wrapAzureError(errors.New("Invalid configuration: can't configure unmanaged image URL without StorageAccount and BlobContainer")) + } + blobname = fmt.Sprintf("%s-os.vhd", name) + instanceVhd := fmt.Sprintf("https://%s.blob.%s/%s/%s", + az.azconfig.StorageAccount, + az.azureEnv.StorageEndpointSuffix, + az.azconfig.BlobContainer, + blobname) + az.logger.Warn("using deprecated unmanaged image, see https://doc.arvados.org/ to migrate to managed disks") + storageProfile = &compute.StorageProfile{ + OsDisk: &compute.OSDisk{ + OsType: compute.Linux, + Name: to.StringPtr(name + "-os"), + CreateOption: compute.DiskCreateOptionTypesFromImage, + Image: &compute.VirtualHardDisk{ + URI: to.StringPtr(string(imageID)), + }, + Vhd: &compute.VirtualHardDisk{ + URI: &instanceVhd, + }, + }, + } + } else { + id := to.StringPtr("/subscriptions/" + az.azconfig.SubscriptionID + "/resourceGroups/" + az.imageResourceGroup + "/providers/Microsoft.Compute/images/" + string(imageID)) + if az.azconfig.SharedImageGalleryName != "" && az.azconfig.SharedImageGalleryImageVersion != "" { + id = to.StringPtr("/subscriptions/" + az.azconfig.SubscriptionID + "/resourceGroups/" + az.imageResourceGroup + "/providers/Microsoft.Compute/galleries/" + az.azconfig.SharedImageGalleryName + "/images/" + string(imageID) + "/versions/" + az.azconfig.SharedImageGalleryImageVersion) + } else if az.azconfig.SharedImageGalleryName != "" || az.azconfig.SharedImageGalleryImageVersion != "" { + az.cleanupNic(nic) + return nil, wrapAzureError(errors.New("Invalid configuration: SharedImageGalleryName and SharedImageGalleryImageVersion must both be set or both be empty")) + } + storageProfile = &compute.StorageProfile{ + ImageReference: &compute.ImageReference{ + ID: id, + }, + OsDisk: &compute.OSDisk{ + OsType: compute.Linux, + Name: to.StringPtr(name + "-os"), + CreateOption: compute.DiskCreateOptionTypesFromImage, + }, + } + } vmParameters := compute.VirtualMachine{ Location: &az.azconfig.Location, @@ -405,19 +498,7 @@ func (az *azureInstanceSet) Create( HardwareProfile: &compute.HardwareProfile{ VMSize: compute.VirtualMachineSizeTypes(instanceType.ProviderType), }, - StorageProfile: &compute.StorageProfile{ - OsDisk: &compute.OSDisk{ - OsType: compute.Linux, - Name: to.StringPtr(name + "-os"), - CreateOption: compute.FromImage, - Image: &compute.VirtualHardDisk{ - URI: to.StringPtr(string(imageID)), - }, - Vhd: &compute.VirtualHardDisk{ - URI: &instanceVhd, - }, - }, - }, + StorageProfile: storageProfile, NetworkProfile: &compute.NetworkProfile{ NetworkInterfaces: &[]compute.NetworkInterfaceReference{ compute.NetworkInterfaceReference{ @@ -449,15 +530,21 @@ func (az *azureInstanceSet) Create( vm, err := az.vmClient.createOrUpdate(az.ctx, az.azconfig.ResourceGroup, name, vmParameters) if err != nil { - _, delerr := az.blobcont.GetBlobReference(blobname).DeleteIfExists(nil) - if delerr != nil { - az.logger.WithError(delerr).Warnf("Error cleaning up vhd blob after failed create") + // Do some cleanup. Otherwise, an unbounded number of new unused nics and + // blobs can pile up during times when VMs can't be created and the + // dispatcher keeps retrying, because the garbage collection in manageBlobs + // and manageNics is only triggered periodically. This is most important + // for nics, because those are subject to a quota. + az.cleanupNic(nic) + + if blobname != "" { + _, delerr := az.blobcont.GetBlobReference(blobname).DeleteIfExists(nil) + if delerr != nil { + az.logger.WithError(delerr).Warnf("Error cleaning up vhd blob after failed create") + } } - _, delerr = az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, *nic.Name) - if delerr != nil { - az.logger.WithError(delerr).Warnf("Error cleaning up NIC after failed create") - } + // Leave cleaning up of managed disks to the garbage collection in manageDisks() return nil, wrapAzureError(err) } @@ -497,7 +584,7 @@ func (az *azureInstanceSet) Instances(cloud.InstanceTags) ([]cloud.Instance, err return instances, nil } -// ManageNics returns a list of Azure network interface resources. +// manageNics returns a list of Azure network interface resources. // Also performs garbage collection of NICs which have "namePrefix", // are not associated with a virtual machine and have a "created-at" // time more than DeleteDanglingResourcesAfter (to prevent racing and @@ -538,7 +625,7 @@ func (az *azureInstanceSet) manageNics() (map[string]network.Interface, error) { return interfaces, nil } -// ManageBlobs garbage collects blobs (VM disk images) in the +// manageBlobs garbage collects blobs (VM disk images) in the // configured storage account container. It will delete blobs which // have "namePrefix", are "available" (which means they are not // leased to a VM) and haven't been modified for @@ -573,11 +660,45 @@ func (az *azureInstanceSet) manageBlobs() { } } +// manageDisks garbage collects managed compute disks (VM disk images) in the +// configured resource group. It will delete disks which have "namePrefix", +// are "unattached" (which means they are not leased to a VM) and were created +// more than DeleteDanglingResourcesAfter seconds ago. (Azure provides no +// modification timestamp on managed disks, there is only a creation timestamp) +func (az *azureInstanceSet) manageDisks() { + + re := regexp.MustCompile(`^` + regexp.QuoteMeta(az.namePrefix) + `.*-os$`) + threshold := time.Now().Add(-az.azconfig.DeleteDanglingResourcesAfter.Duration()) + + response, err := az.disksClient.listByResourceGroup(az.ctx, az.imageResourceGroup) + if err != nil { + az.logger.WithError(err).Warn("Error listing disks") + return + } + + for ; response.NotDone(); err = response.Next() { + if err != nil { + az.logger.WithError(err).Warn("Error getting next page of disks") + return + } + for _, d := range response.Values() { + if d.DiskProperties.DiskState == compute.Unattached && + d.Name != nil && re.MatchString(*d.Name) && + d.DiskProperties.TimeCreated.ToTime().Before(threshold) { + + az.logger.Printf("Disk %v is unlocked and was created at %+v, will delete", *d.Name, d.DiskProperties.TimeCreated.ToTime()) + az.deleteDisk <- d + } + } + } +} + func (az *azureInstanceSet) Stop() { az.stopFunc() az.stopWg.Wait() close(az.deleteNIC) close(az.deleteBlob) + close(az.deleteDisk) } type azureInstance struct { diff --git a/lib/cloud/azure/azure_test.go b/lib/cloud/azure/azure_test.go index 94af0b9a26..7b5a34df59 100644 --- a/lib/cloud/azure/azure_test.go +++ b/lib/cloud/azure/azure_test.go @@ -47,7 +47,7 @@ import ( "git.arvados.org/arvados.git/lib/dispatchcloud/test" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/config" - "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute" + "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute" "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network" "github.com/Azure/azure-sdk-for-go/storage" "github.com/Azure/go-autorest/autorest" @@ -156,6 +156,7 @@ func GetInstanceSet() (cloud.InstanceSet, cloud.ImageID, arvados.Cluster, error) logger: logrus.StandardLogger(), deleteNIC: make(chan string), deleteBlob: make(chan storage.Blob), + deleteDisk: make(chan compute.Disk), } ap.ctx, ap.stopFunc = context.WithCancel(context.Background()) ap.vmClient = &VirtualMachinesClientStub{} diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index cac0ac61d7..80294afaf3 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -483,6 +483,9 @@ Clusters: # Use of this feature is not recommended, if it can be avoided. ForwardSlashNameSubstitution: "" + # Include "folder objects" in S3 ListObjects responses. + S3FolderObjects: true + # Managed collection properties. At creation time, if the client didn't # provide the listed keys, they will be automatically populated following # one of the following behaviors: @@ -693,8 +696,8 @@ Clusters: Enable: false Users: SAMPLE: - email: alice@example.com - password: xyzzy + Email: alice@example.com + Password: xyzzy # The cluster ID to delegate the user database. When set, # logins on this cluster will be redirected to the login cluster @@ -705,6 +708,11 @@ Clusters: # remain valid before it needs to be revalidated. RemoteTokenRefresh: 5m + # How long a client token created from a login flow will be valid without + # asking the user to re-login. Example values: 60m, 8h. + # Default value zero means tokens don't have expiration. + TokenLifetime: 0s + Git: # Path to git or gitolite-shell executable. Each authenticated # request will execute this program with the single argument "http-backend" @@ -959,6 +967,12 @@ Clusters: TimeoutShutdown: 10s # Worker VM image ID. + # (aws) AMI identifier + # (azure) managed disks: the name of the managed disk image + # (azure) shared image gallery: the name of the image definition. Also + # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields. + # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g. + # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd ImageID: "" # An executable file (located on the dispatcher host) to be @@ -1027,7 +1041,16 @@ Clusters: Network: "" Subnet: "" - # (azure) Where to store the VM VHD blobs + # (azure) managed disks: The resource group where the managed disk + # image can be found (if different from ResourceGroup). + ImageResourceGroup: "" + + # (azure) shared image gallery: the name of the gallery + SharedImageGalleryName: "" + # (azure) shared image gallery: the version of the image definition + SharedImageGalleryImageVersion: "" + + # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs StorageAccount: "" BlobContainer: "" @@ -1291,7 +1314,7 @@ Clusters: # a link to the multi-site search page on a "home" Workbench site. # # Example: - # https://workbench.qr1hi.arvadosapi.com/collections/multisite + # https://workbench.zzzzz.arvadosapi.com/collections/multisite MultiSiteSearch: "" # Should workbench allow management of local git repositories? Set to false if diff --git a/lib/config/export.go b/lib/config/export.go index 251415f711..b203dff26a 100644 --- a/lib/config/export.go +++ b/lib/config/export.go @@ -59,10 +59,10 @@ func ExportJSON(w io.Writer, cluster *arvados.Cluster) error { // exists. var whitelist = map[string]bool{ // | sort -t'"' -k2,2 - "ClusterID": true, "API": true, "API.AsyncPermissionsUpdateInterval": false, "API.DisabledAPIs": false, + "API.KeepServiceRequestTimeout": false, "API.MaxConcurrentRequests": false, "API.MaxIndexDatabaseRead": false, "API.MaxItemsPerResponse": true, @@ -71,24 +71,29 @@ var whitelist = map[string]bool{ "API.MaxRequestSize": true, "API.RailsSessionSecretToken": false, "API.RequestTimeout": true, - "API.WebsocketClientEventQueue": false, "API.SendTimeout": true, + "API.WebsocketClientEventQueue": false, "API.WebsocketServerEventQueue": false, - "API.KeepServiceRequestTimeout": false, "AuditLogs": false, "AuditLogs.MaxAge": false, "AuditLogs.MaxDeleteBatch": false, "AuditLogs.UnloggedAttributes": false, + "ClusterID": true, "Collections": true, + "Collections.BalanceCollectionBatch": false, + "Collections.BalanceCollectionBuffers": false, + "Collections.BalancePeriod": false, + "Collections.BalanceTimeout": false, + "Collections.BlobDeleteConcurrency": false, + "Collections.BlobMissingReport": false, + "Collections.BlobReplicateConcurrency": false, "Collections.BlobSigning": true, "Collections.BlobSigningKey": false, "Collections.BlobSigningTTL": true, "Collections.BlobTrash": false, - "Collections.BlobTrashLifetime": false, - "Collections.BlobTrashConcurrency": false, "Collections.BlobTrashCheckInterval": false, - "Collections.BlobDeleteConcurrency": false, - "Collections.BlobReplicateConcurrency": false, + "Collections.BlobTrashConcurrency": false, + "Collections.BlobTrashLifetime": false, "Collections.CollectionVersioning": false, "Collections.DefaultReplication": true, "Collections.DefaultTrashLifetime": true, @@ -97,18 +102,14 @@ var whitelist = map[string]bool{ "Collections.ManagedProperties.*": true, "Collections.ManagedProperties.*.*": true, "Collections.PreserveVersionIfIdle": true, + "Collections.S3FolderObjects": true, "Collections.TrashSweepInterval": false, "Collections.TrustAllContent": false, "Collections.WebDAVCache": false, - "Collections.BalanceCollectionBatch": false, - "Collections.BalancePeriod": false, - "Collections.BalanceTimeout": false, - "Collections.BlobMissingReport": false, - "Collections.BalanceCollectionBuffers": false, "Containers": true, "Containers.CloudVMs": false, - "Containers.CrunchRunCommand": false, "Containers.CrunchRunArgumentsList": false, + "Containers.CrunchRunCommand": false, "Containers.DefaultKeepCacheRAM": true, "Containers.DispatchPrivateKey": false, "Containers.JobsAPI": true, @@ -155,31 +156,32 @@ var whitelist = map[string]bool{ "Login.OpenIDConnect": true, "Login.OpenIDConnect.ClientID": false, "Login.OpenIDConnect.ClientSecret": false, - "Login.OpenIDConnect.Enable": true, - "Login.OpenIDConnect.Issuer": false, "Login.OpenIDConnect.EmailClaim": false, "Login.OpenIDConnect.EmailVerifiedClaim": false, + "Login.OpenIDConnect.Enable": true, + "Login.OpenIDConnect.Issuer": false, "Login.OpenIDConnect.UsernameClaim": false, "Login.PAM": true, "Login.PAM.DefaultEmailDomain": false, "Login.PAM.Enable": true, "Login.PAM.Service": false, + "Login.RemoteTokenRefresh": true, "Login.SSO": true, "Login.SSO.Enable": true, "Login.SSO.ProviderAppID": false, "Login.SSO.ProviderAppSecret": false, - "Login.RemoteTokenRefresh": true, "Login.Test": true, "Login.Test.Enable": true, "Login.Test.Users": false, + "Login.TokenLifetime": false, "Mail": true, + "Mail.EmailFrom": false, + "Mail.IssueReporterEmailFrom": false, + "Mail.IssueReporterEmailTo": false, "Mail.MailchimpAPIKey": false, "Mail.MailchimpListID": false, "Mail.SendUserSetupNotificationEmail": false, - "Mail.IssueReporterEmailFrom": false, - "Mail.IssueReporterEmailTo": false, "Mail.SupportEmailAddress": true, - "Mail.EmailFrom": false, "ManagementToken": false, "PostgreSQL": false, "RemoteClusters": true, @@ -197,8 +199,8 @@ var whitelist = map[string]bool{ "SystemRootToken": false, "TLS": false, "Users": true, - "Users.AnonymousUserToken": true, "Users.AdminNotifierEmailFrom": false, + "Users.AnonymousUserToken": true, "Users.AutoAdminFirstUser": false, "Users.AutoAdminUserWithEmail": false, "Users.AutoSetupNewUsers": false, @@ -235,6 +237,7 @@ var whitelist = map[string]bool{ "Workbench.EnableGettingStartedPopup": true, "Workbench.EnablePublicProjectsPage": true, "Workbench.FileViewersConfigURL": true, + "Workbench.InactivePageHTML": true, "Workbench.LogViewerMaxBytes": true, "Workbench.MultiSiteSearch": true, "Workbench.ProfilingEnabled": true, @@ -246,6 +249,8 @@ var whitelist = map[string]bool{ "Workbench.ShowUserAgreementInline": true, "Workbench.ShowUserNotifications": true, "Workbench.SiteName": true, + "Workbench.SSHHelpHostSuffix": true, + "Workbench.SSHHelpPageHTML": true, "Workbench.Theme": true, "Workbench.UserProfileFormFields": true, "Workbench.UserProfileFormFields.*": true, @@ -254,9 +259,6 @@ var whitelist = map[string]bool{ "Workbench.UserProfileFormMessage": true, "Workbench.VocabularyURL": true, "Workbench.WelcomePageHTML": true, - "Workbench.InactivePageHTML": true, - "Workbench.SSHHelpPageHTML": true, - "Workbench.SSHHelpHostSuffix": true, } func redactUnsafe(m map[string]interface{}, mPrefix, lookupPrefix string) error { diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go index e3bd13fd26..57204cf36a 100644 --- a/lib/config/generated_config.go +++ b/lib/config/generated_config.go @@ -489,6 +489,9 @@ Clusters: # Use of this feature is not recommended, if it can be avoided. ForwardSlashNameSubstitution: "" + # Include "folder objects" in S3 ListObjects responses. + S3FolderObjects: true + # Managed collection properties. At creation time, if the client didn't # provide the listed keys, they will be automatically populated following # one of the following behaviors: @@ -699,8 +702,8 @@ Clusters: Enable: false Users: SAMPLE: - email: alice@example.com - password: xyzzy + Email: alice@example.com + Password: xyzzy # The cluster ID to delegate the user database. When set, # logins on this cluster will be redirected to the login cluster @@ -711,6 +714,11 @@ Clusters: # remain valid before it needs to be revalidated. RemoteTokenRefresh: 5m + # How long a client token created from a login flow will be valid without + # asking the user to re-login. Example values: 60m, 8h. + # Default value zero means tokens don't have expiration. + TokenLifetime: 0s + Git: # Path to git or gitolite-shell executable. Each authenticated # request will execute this program with the single argument "http-backend" @@ -965,6 +973,12 @@ Clusters: TimeoutShutdown: 10s # Worker VM image ID. + # (aws) AMI identifier + # (azure) managed disks: the name of the managed disk image + # (azure) shared image gallery: the name of the image definition. Also + # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields. + # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g. + # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd ImageID: "" # An executable file (located on the dispatcher host) to be @@ -1033,7 +1047,16 @@ Clusters: Network: "" Subnet: "" - # (azure) Where to store the VM VHD blobs + # (azure) managed disks: The resource group where the managed disk + # image can be found (if different from ResourceGroup). + ImageResourceGroup: "" + + # (azure) shared image gallery: the name of the gallery + SharedImageGalleryName: "" + # (azure) shared image gallery: the version of the image definition + SharedImageGalleryImageVersion: "" + + # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs StorageAccount: "" BlobContainer: "" @@ -1297,7 +1320,7 @@ Clusters: # a link to the multi-site search page on a "home" Workbench site. # # Example: - # https://workbench.qr1hi.arvadosapi.com/collections/multisite + # https://workbench.zzzzz.arvadosapi.com/collections/multisite MultiSiteSearch: "" # Should workbench allow management of local git repositories? Set to false if diff --git a/lib/controller/federation/federation_test.go b/lib/controller/federation/federation_test.go index 256afc8e6b..5079b402b7 100644 --- a/lib/controller/federation/federation_test.go +++ b/lib/controller/federation/federation_test.go @@ -38,7 +38,7 @@ func (s *FederationSuite) SetUpTest(c *check.C) { ClusterID: "aaaaa", SystemRootToken: arvadostest.SystemRootToken, RemoteClusters: map[string]arvados.RemoteCluster{ - "aaaaa": arvados.RemoteCluster{ + "aaaaa": { Host: os.Getenv("ARVADOS_API_HOST"), }, }, diff --git a/lib/controller/localdb/login_ldap_test.go b/lib/controller/localdb/login_ldap_test.go index 700d757c27..bce1ecfcf2 100644 --- a/lib/controller/localdb/login_ldap_test.go +++ b/lib/controller/localdb/login_ldap_test.go @@ -64,7 +64,7 @@ func (s *LDAPSuite) SetUpSuite(c *check.C) { return []*godap.LDAPSimpleSearchResultEntry{} } return []*godap.LDAPSimpleSearchResultEntry{ - &godap.LDAPSimpleSearchResultEntry{ + { DN: "cn=" + req.FilterValue + "," + req.BaseDN, Attrs: map[string]interface{}{ "SN": req.FilterValue, diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go index aa5f22a501..42decff31d 100644 --- a/lib/dispatchcloud/dispatcher_test.go +++ b/lib/dispatchcloud/dispatcher_test.go @@ -115,6 +115,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) { ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) { return ChooseInstanceType(s.cluster, ctr) }, + Logger: ctxlog.TestLogger(c), } for i := 0; i < 200; i++ { queue.Containers = append(queue.Containers, arvados.Container{ @@ -170,6 +171,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) { stubvm.CrunchRunCrashRate = 0.1 } } + s.stubDriver.Bugf = c.Errorf start := time.Now() go s.disp.run() @@ -303,7 +305,7 @@ func (s *DispatcherSuite) TestInstancesAPI(c *check.C) { time.Sleep(time.Millisecond) } c.Assert(len(sr.Items), check.Equals, 1) - c.Check(sr.Items[0].Instance, check.Matches, "stub.*") + c.Check(sr.Items[0].Instance, check.Matches, "inst.*") c.Check(sr.Items[0].WorkerState, check.Equals, "booting") c.Check(sr.Items[0].Price, check.Equals, 0.123) c.Check(sr.Items[0].LastContainerUUID, check.Equals, "") diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go index 4447f084a9..dddb974b32 100644 --- a/lib/dispatchcloud/scheduler/run_queue.go +++ b/lib/dispatchcloud/scheduler/run_queue.go @@ -88,6 +88,8 @@ tryrun: // a higher-priority container on the // same instance type. Don't let this // one sneak in ahead of it. + } else if sch.pool.KillContainer(ctr.UUID, "about to lock") { + logger.Info("not restarting yet: crunch-run process from previous attempt has not exited") } else if sch.pool.StartContainer(it, ctr) { // Success. } else { diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go index 32c6b3b24d..992edddfba 100644 --- a/lib/dispatchcloud/scheduler/run_queue_test.go +++ b/lib/dispatchcloud/scheduler/run_queue_test.go @@ -83,8 +83,9 @@ func (p *stubPool) ForgetContainer(uuid string) { func (p *stubPool) KillContainer(uuid, reason string) bool { p.Lock() defer p.Unlock() - delete(p.running, uuid) - return true + defer delete(p.running, uuid) + t, ok := p.running[uuid] + return ok && t.IsZero() } func (p *stubPool) Shutdown(arvados.InstanceType) bool { p.shutdowns++ diff --git a/lib/dispatchcloud/scheduler/sync.go b/lib/dispatchcloud/scheduler/sync.go index 116ca76431..fc683505f9 100644 --- a/lib/dispatchcloud/scheduler/sync.go +++ b/lib/dispatchcloud/scheduler/sync.go @@ -109,13 +109,17 @@ func (sch *Scheduler) cancel(uuid string, reason string) { } func (sch *Scheduler) kill(uuid string, reason string) { + if !sch.uuidLock(uuid, "kill") { + return + } + defer sch.uuidUnlock(uuid) sch.pool.KillContainer(uuid, reason) sch.pool.ForgetContainer(uuid) } func (sch *Scheduler) requeue(ent container.QueueEnt, reason string) { uuid := ent.Container.UUID - if !sch.uuidLock(uuid, "cancel") { + if !sch.uuidLock(uuid, "requeue") { return } defer sch.uuidUnlock(uuid) diff --git a/lib/dispatchcloud/test/queue.go b/lib/dispatchcloud/test/queue.go index 11d410fb1b..74b84122f2 100644 --- a/lib/dispatchcloud/test/queue.go +++ b/lib/dispatchcloud/test/queue.go @@ -11,6 +11,7 @@ import ( "git.arvados.org/arvados.git/lib/dispatchcloud/container" "git.arvados.org/arvados.git/sdk/go/arvados" + "github.com/sirupsen/logrus" ) // Queue is a test stub for container.Queue. The caller specifies the @@ -23,6 +24,8 @@ type Queue struct { // must not be nil. ChooseType func(*arvados.Container) (arvados.InstanceType, error) + Logger logrus.FieldLogger + entries map[string]container.QueueEnt updTime time.Time subscribers map[<-chan struct{}]chan struct{} @@ -166,13 +169,36 @@ func (q *Queue) Notify(upd arvados.Container) bool { defer q.mtx.Unlock() for i, ctr := range q.Containers { if ctr.UUID == upd.UUID { - if ctr.State != arvados.ContainerStateComplete && ctr.State != arvados.ContainerStateCancelled { + if allowContainerUpdate[ctr.State][upd.State] { q.Containers[i] = upd return true + } else { + if q.Logger != nil { + q.Logger.WithField("ContainerUUID", ctr.UUID).Infof("test.Queue rejected update from %s to %s", ctr.State, upd.State) + } + return false } - return false } } q.Containers = append(q.Containers, upd) return true } + +var allowContainerUpdate = map[arvados.ContainerState]map[arvados.ContainerState]bool{ + arvados.ContainerStateQueued: map[arvados.ContainerState]bool{ + arvados.ContainerStateQueued: true, + arvados.ContainerStateLocked: true, + arvados.ContainerStateCancelled: true, + }, + arvados.ContainerStateLocked: map[arvados.ContainerState]bool{ + arvados.ContainerStateQueued: true, + arvados.ContainerStateLocked: true, + arvados.ContainerStateRunning: true, + arvados.ContainerStateCancelled: true, + }, + arvados.ContainerStateRunning: map[arvados.ContainerState]bool{ + arvados.ContainerStateRunning: true, + arvados.ContainerStateCancelled: true, + arvados.ContainerStateComplete: true, + }, +} diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go index 7a1f423016..f6e06d3f7c 100644 --- a/lib/dispatchcloud/test/stub_driver.go +++ b/lib/dispatchcloud/test/stub_driver.go @@ -34,6 +34,11 @@ type StubDriver struct { // VM's error rate and other behaviors. SetupVM func(*StubVM) + // Bugf, if set, is called if a bug is detected in the caller + // or stub. Typically set to (*check.C)Errorf. If unset, + // logger.Warnf is called instead. + Bugf func(string, ...interface{}) + // StubVM's fake crunch-run uses this Queue to read and update // container state. Queue *Queue @@ -99,6 +104,7 @@ type StubInstanceSet struct { allowCreateCall time.Time allowInstancesCall time.Time + lastInstanceID int } func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags cloud.InstanceTags, cmd cloud.InitCommand, authKey ssh.PublicKey) (cloud.Instance, error) { @@ -120,9 +126,10 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, if authKey != nil { ak = append([]ssh.PublicKey{authKey}, ak...) } + sis.lastInstanceID++ svm := &StubVM{ sis: sis, - id: cloud.InstanceID(fmt.Sprintf("stub-%s-%x", it.ProviderType, math_rand.Int63())), + id: cloud.InstanceID(fmt.Sprintf("inst%d,%s", sis.lastInstanceID, it.ProviderType)), tags: copyTags(tags), providerType: it.ProviderType, initCommand: cmd, @@ -263,49 +270,68 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, }) logger.Printf("[test] starting crunch-run stub") go func() { + var ctr arvados.Container + var started, completed bool + defer func() { + logger.Print("[test] exiting crunch-run stub") + svm.Lock() + defer svm.Unlock() + if svm.running[uuid] != pid { + if !completed { + bugf := svm.sis.driver.Bugf + if bugf == nil { + bugf = logger.Warnf + } + bugf("[test] StubDriver bug or caller bug: pid %d exiting, running[%s]==%d", pid, uuid, svm.running[uuid]) + } + } else { + delete(svm.running, uuid) + } + if !completed { + logger.WithField("State", ctr.State).Print("[test] crashing crunch-run stub") + if started && svm.CrashRunningContainer != nil { + svm.CrashRunningContainer(ctr) + } + } + }() + crashluck := math_rand.Float64() + wantCrash := crashluck < svm.CrunchRunCrashRate + wantCrashEarly := crashluck < svm.CrunchRunCrashRate/2 + ctr, ok := queue.Get(uuid) if !ok { logger.Print("[test] container not in queue") return } - defer func() { - if ctr.State == arvados.ContainerStateRunning && svm.CrashRunningContainer != nil { - svm.CrashRunningContainer(ctr) - } - }() - - if crashluck > svm.CrunchRunCrashRate/2 { - time.Sleep(time.Duration(math_rand.Float64()*20) * time.Millisecond) - ctr.State = arvados.ContainerStateRunning - if !queue.Notify(ctr) { - ctr, _ = queue.Get(uuid) - logger.Print("[test] erroring out because state=Running update was rejected") - return - } - } - time.Sleep(time.Duration(math_rand.Float64()*20) * time.Millisecond) svm.Lock() - defer svm.Unlock() - if svm.running[uuid] != pid { - logger.Print("[test] container was killed") + killed := svm.running[uuid] != pid + svm.Unlock() + if killed || wantCrashEarly { return } - delete(svm.running, uuid) - if crashluck < svm.CrunchRunCrashRate { + ctr.State = arvados.ContainerStateRunning + started = queue.Notify(ctr) + if !started { + ctr, _ = queue.Get(uuid) + logger.Print("[test] erroring out because state=Running update was rejected") + return + } + + if wantCrash { logger.WithField("State", ctr.State).Print("[test] crashing crunch-run stub") - } else { - if svm.ExecuteContainer != nil { - ctr.ExitCode = svm.ExecuteContainer(ctr) - } - logger.WithField("ExitCode", ctr.ExitCode).Print("[test] exiting crunch-run stub") - ctr.State = arvados.ContainerStateComplete - go queue.Notify(ctr) + return + } + if svm.ExecuteContainer != nil { + ctr.ExitCode = svm.ExecuteContainer(ctr) } + logger.WithField("ExitCode", ctr.ExitCode).Print("[test] completing container") + ctr.State = arvados.ContainerStateComplete + completed = queue.Notify(ctr) }() return 0 } diff --git a/sdk/cli/arvados-cli.gemspec b/sdk/cli/arvados-cli.gemspec index f60adf5385..4096a2eb15 100644 --- a/sdk/cli/arvados-cli.gemspec +++ b/sdk/cli/arvados-cli.gemspec @@ -46,7 +46,8 @@ Gem::Specification.new do |s| s.add_runtime_dependency 'json', '>= 1.7.7', '<3' s.add_runtime_dependency 'optimist', '~> 3.0' s.add_runtime_dependency 'andand', '~> 1.3', '>= 1.3.3' - s.add_runtime_dependency 'oj', '~> 3.0' + # oj 3.10.9 requires ruby >= 2.4 and arvbox doesn't currently have it because of SSO + s.add_runtime_dependency 'oj', '< 3.10.9' s.add_runtime_dependency 'curb', '~> 0.8' s.add_runtime_dependency 'launchy', '< 2.5' # arvados-google-api-client 0.8.7.2 is incompatible with faraday 0.16.2 diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go index e123671d4a..d98ffd18ed 100644 --- a/sdk/go/arvados/config.go +++ b/sdk/go/arvados/config.go @@ -121,6 +121,7 @@ type Cluster struct { TrashSweepInterval Duration TrustAllContent bool ForwardSlashNameSubstitution string + S3FolderObjects bool BlobMissingReport string BalancePeriod Duration @@ -182,6 +183,7 @@ type Cluster struct { } LoginCluster string RemoteTokenRefresh Duration + TokenLifetime Duration } Mail struct { MailchimpAPIKey string diff --git a/sdk/go/arvados/fs_base.go b/sdk/go/arvados/fs_base.go index d06aba3695..5e57fed3be 100644 --- a/sdk/go/arvados/fs_base.go +++ b/sdk/go/arvados/fs_base.go @@ -31,6 +31,10 @@ var ( ErrPermission = os.ErrPermission ) +type syncer interface { + Sync() error +} + // A File is an *os.File-like interface for reading and writing files // in a FileSystem. type File interface { @@ -299,6 +303,22 @@ func (n *treenode) Readdir() (fi []os.FileInfo, err error) { return } +func (n *treenode) Sync() error { + n.RLock() + defer n.RUnlock() + for _, inode := range n.inodes { + syncer, ok := inode.(syncer) + if !ok { + return ErrInvalidOperation + } + err := syncer.Sync() + if err != nil { + return err + } + } + return nil +} + type fileSystem struct { root inode fsBackend @@ -576,8 +596,11 @@ func (fs *fileSystem) remove(name string, recursive bool) error { } func (fs *fileSystem) Sync() error { - log.Printf("TODO: sync fileSystem") - return ErrInvalidOperation + if syncer, ok := fs.root.(syncer); ok { + return syncer.Sync() + } else { + return ErrInvalidOperation + } } func (fs *fileSystem) Flush(string, bool) error { diff --git a/sdk/go/arvados/fs_collection.go b/sdk/go/arvados/fs_collection.go index 37bd494914..060b57b493 100644 --- a/sdk/go/arvados/fs_collection.go +++ b/sdk/go/arvados/fs_collection.go @@ -121,6 +121,62 @@ func (fs *collectionFileSystem) newNode(name string, perm os.FileMode, modTime t } } +func (fs *collectionFileSystem) Child(name string, replace func(inode) (inode, error)) (inode, error) { + return fs.rootnode().Child(name, replace) +} + +func (fs *collectionFileSystem) FS() FileSystem { + return fs +} + +func (fs *collectionFileSystem) FileInfo() os.FileInfo { + return fs.rootnode().FileInfo() +} + +func (fs *collectionFileSystem) IsDir() bool { + return true +} + +func (fs *collectionFileSystem) Lock() { + fs.rootnode().Lock() +} + +func (fs *collectionFileSystem) Unlock() { + fs.rootnode().Unlock() +} + +func (fs *collectionFileSystem) RLock() { + fs.rootnode().RLock() +} + +func (fs *collectionFileSystem) RUnlock() { + fs.rootnode().RUnlock() +} + +func (fs *collectionFileSystem) Parent() inode { + return fs.rootnode().Parent() +} + +func (fs *collectionFileSystem) Read(_ []byte, ptr filenodePtr) (int, filenodePtr, error) { + return 0, ptr, ErrInvalidOperation +} + +func (fs *collectionFileSystem) Write(_ []byte, ptr filenodePtr) (int, filenodePtr, error) { + return 0, ptr, ErrInvalidOperation +} + +func (fs *collectionFileSystem) Readdir() ([]os.FileInfo, error) { + return fs.rootnode().Readdir() +} + +func (fs *collectionFileSystem) SetParent(parent inode, name string) { + fs.rootnode().SetParent(parent, name) +} + +func (fs *collectionFileSystem) Truncate(int64) error { + return ErrInvalidOperation +} + func (fs *collectionFileSystem) Sync() error { if fs.uuid == "" { return nil @@ -512,8 +568,6 @@ func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePt seg.Truncate(len(cando)) fn.memsize += int64(len(cando)) fn.segments[cur] = seg - cur++ - prev++ } } @@ -1053,9 +1107,9 @@ func (dn *dirnode) loadManifest(txt string) error { // situation might be rare anyway) segIdx, pos = 0, 0 } - for next := int64(0); segIdx < len(segments); segIdx++ { + for ; segIdx < len(segments); segIdx++ { seg := segments[segIdx] - next = pos + int64(seg.Len()) + next := pos + int64(seg.Len()) if next <= offset || seg.Len() == 0 { pos = next continue diff --git a/sdk/go/arvados/fs_collection_test.go b/sdk/go/arvados/fs_collection_test.go index f01369a885..59a6a6ba82 100644 --- a/sdk/go/arvados/fs_collection_test.go +++ b/sdk/go/arvados/fs_collection_test.go @@ -7,7 +7,6 @@ package arvados import ( "bytes" "crypto/md5" - "crypto/sha1" "errors" "fmt" "io" @@ -33,6 +32,9 @@ type keepClientStub struct { blocks map[string][]byte refreshable map[string]bool onPut func(bufcopy []byte) // called from PutB, before acquiring lock + authToken string // client's auth token (used for signing locators) + sigkey string // blob signing key + sigttl time.Duration // blob signing ttl sync.RWMutex } @@ -49,7 +51,7 @@ func (kcs *keepClientStub) ReadAt(locator string, p []byte, off int) (int, error } func (kcs *keepClientStub) PutB(p []byte) (string, int, error) { - locator := fmt.Sprintf("%x+%d+A12345@abcde", md5.Sum(p), len(p)) + locator := SignLocator(fmt.Sprintf("%x+%d", md5.Sum(p), len(p)), kcs.authToken, time.Now().Add(kcs.sigttl), kcs.sigttl, []byte(kcs.sigkey)) buf := make([]byte, len(p)) copy(buf, p) if kcs.onPut != nil { @@ -61,9 +63,12 @@ func (kcs *keepClientStub) PutB(p []byte) (string, int, error) { return locator, 1, nil } -var localOrRemoteSignature = regexp.MustCompile(`\+[AR][^+]*`) +var reRemoteSignature = regexp.MustCompile(`\+[AR][^+]*`) func (kcs *keepClientStub) LocalLocator(locator string) (string, error) { + if strings.Contains(locator, "+A") { + return locator, nil + } kcs.Lock() defer kcs.Unlock() if strings.Contains(locator, "+R") { @@ -74,8 +79,9 @@ func (kcs *keepClientStub) LocalLocator(locator string) (string, error) { return "", fmt.Errorf("kcs.refreshable[%q]==false", locator) } } - fakeSig := fmt.Sprintf("+A%x@%x", sha1.Sum(nil), time.Now().Add(time.Hour*24*14).Unix()) - return localOrRemoteSignature.ReplaceAllLiteralString(locator, fakeSig), nil + locator = reRemoteSignature.ReplaceAllLiteralString(locator, "") + locator = SignLocator(locator, kcs.authToken, time.Now().Add(kcs.sigttl), kcs.sigttl, []byte(kcs.sigkey)) + return locator, nil } type CollectionFSSuite struct { @@ -92,7 +98,11 @@ func (s *CollectionFSSuite) SetUpTest(c *check.C) { s.kc = &keepClientStub{ blocks: map[string][]byte{ "3858f62230ac3c915f300c664312c63f": []byte("foobar"), - }} + }, + sigkey: fixtureBlobSigningKey, + sigttl: fixtureBlobSigningTTL, + authToken: fixtureActiveToken, + } s.fs, err = s.coll.FileSystem(s.client, s.kc) c.Assert(err, check.IsNil) } diff --git a/sdk/go/arvados/fs_deferred.go b/sdk/go/arvados/fs_deferred.go index 439eaec7c2..254b90c812 100644 --- a/sdk/go/arvados/fs_deferred.go +++ b/sdk/go/arvados/fs_deferred.go @@ -32,14 +32,14 @@ func deferredCollectionFS(fs FileSystem, parent inode, coll Collection) inode { log.Printf("BUG: unhandled error: %s", err) return placeholder } - cfs, err := coll.FileSystem(fs, fs) + newfs, err := coll.FileSystem(fs, fs) if err != nil { log.Printf("BUG: unhandled error: %s", err) return placeholder } - root := cfs.rootnode() - root.SetParent(parent, coll.Name) - return root + cfs := newfs.(*collectionFileSystem) + cfs.SetParent(parent, coll.Name) + return cfs }} } @@ -87,6 +87,19 @@ func (dn *deferrednode) Child(name string, replace func(inode) (inode, error)) ( return dn.realinode().Child(name, replace) } +// Sync is a no-op if the real inode hasn't even been created yet. +func (dn *deferrednode) Sync() error { + dn.mtx.Lock() + defer dn.mtx.Unlock() + if !dn.created { + return nil + } else if syncer, ok := dn.wrapped.(syncer); ok { + return syncer.Sync() + } else { + return ErrInvalidOperation + } +} + func (dn *deferrednode) Truncate(size int64) error { return dn.realinode().Truncate(size) } func (dn *deferrednode) SetParent(p inode, name string) { dn.realinode().SetParent(p, name) } func (dn *deferrednode) IsDir() bool { return dn.currentinode().IsDir() } diff --git a/sdk/go/arvados/fs_lookup.go b/sdk/go/arvados/fs_lookup.go index 42322a14a9..56b5953234 100644 --- a/sdk/go/arvados/fs_lookup.go +++ b/sdk/go/arvados/fs_lookup.go @@ -15,7 +15,7 @@ import ( // // See (*customFileSystem)MountUsers for example usage. type lookupnode struct { - inode + treenode loadOne func(parent inode, name string) (inode, error) loadAll func(parent inode) ([]inode, error) stale func(time.Time) bool @@ -26,6 +26,20 @@ type lookupnode struct { staleOne map[string]time.Time } +// Sync flushes pending writes for loaded children and, if successful, +// triggers a reload on next lookup. +func (ln *lookupnode) Sync() error { + err := ln.treenode.Sync() + if err != nil { + return err + } + ln.staleLock.Lock() + ln.staleAll = time.Time{} + ln.staleOne = nil + ln.staleLock.Unlock() + return nil +} + func (ln *lookupnode) Readdir() ([]os.FileInfo, error) { ln.staleLock.Lock() defer ln.staleLock.Unlock() @@ -36,7 +50,7 @@ func (ln *lookupnode) Readdir() ([]os.FileInfo, error) { return nil, err } for _, child := range all { - _, err = ln.inode.Child(child.FileInfo().Name(), func(inode) (inode, error) { + _, err = ln.treenode.Child(child.FileInfo().Name(), func(inode) (inode, error) { return child, nil }) if err != nil { @@ -49,25 +63,47 @@ func (ln *lookupnode) Readdir() ([]os.FileInfo, error) { // newer than ln.staleAll. Reclaim memory. ln.staleOne = nil } - return ln.inode.Readdir() + return ln.treenode.Readdir() } +// Child rejects (with ErrInvalidArgument) calls to add/replace +// children, instead calling loadOne when a non-existing child is +// looked up. func (ln *lookupnode) Child(name string, replace func(inode) (inode, error)) (inode, error) { ln.staleLock.Lock() defer ln.staleLock.Unlock() checkTime := time.Now() + var existing inode + var err error if ln.stale(ln.staleAll) && ln.stale(ln.staleOne[name]) { - _, err := ln.inode.Child(name, func(inode) (inode, error) { + existing, err = ln.treenode.Child(name, func(inode) (inode, error) { return ln.loadOne(ln, name) }) - if err != nil { - return nil, err + if err == nil && existing != nil { + if ln.staleOne == nil { + ln.staleOne = map[string]time.Time{name: checkTime} + } else { + ln.staleOne[name] = checkTime + } } - if ln.staleOne == nil { - ln.staleOne = map[string]time.Time{name: checkTime} - } else { - ln.staleOne[name] = checkTime + } else { + existing, err = ln.treenode.Child(name, nil) + if err != nil && !os.IsNotExist(err) { + return existing, err + } + } + if replace != nil { + // Let the callback try to delete or replace the + // existing node; if it does, return + // ErrInvalidArgument. + if tryRepl, err := replace(existing); err != nil { + // Propagate error from callback + return existing, err + } else if tryRepl != existing { + return existing, ErrInvalidArgument } } - return ln.inode.Child(name, replace) + // Return original error from ln.treenode.Child() (it might be + // ErrNotExist). + return existing, err } diff --git a/sdk/go/arvados/fs_project.go b/sdk/go/arvados/fs_project.go index c5eb03360a..bf6391a74e 100644 --- a/sdk/go/arvados/fs_project.go +++ b/sdk/go/arvados/fs_project.go @@ -6,7 +6,6 @@ package arvados import ( "log" - "os" "strings" ) @@ -57,7 +56,7 @@ func (fs *customFileSystem) projectsLoadOne(parent inode, uuid, name string) (in // both "/" and the substitution string. } if len(contents.Items) == 0 { - return nil, os.ErrNotExist + return nil, nil } coll := contents.Items[0] diff --git a/sdk/go/arvados/fs_project_test.go b/sdk/go/arvados/fs_project_test.go index 61d82c7fa9..86facd681e 100644 --- a/sdk/go/arvados/fs_project_test.go +++ b/sdk/go/arvados/fs_project_test.go @@ -200,6 +200,23 @@ func (s *SiteFSSuite) TestProjectUpdatedByOther(c *check.C) { err = wf.Close() c.Check(err, check.IsNil) + err = project.Sync() + c.Check(err, check.IsNil) + _, err = s.fs.Open("/home/A Project/oob/test.txt") + c.Check(err, check.IsNil) + + // Sync again to mark the project dir as stale, so the + // collection gets reloaded from the controller on next + // lookup. + err = project.Sync() + c.Check(err, check.IsNil) + + // Ensure collection was flushed by Sync + var latest Collection + err = s.client.RequestAndDecode(&latest, "GET", "arvados/v1/collections/"+oob.UUID, nil, nil) + c.Check(err, check.IsNil) + c.Check(latest.ManifestText, check.Matches, `.*:test.txt.*\n`) + // Delete test.txt behind s.fs's back by updating the // collection record with an empty ManifestText. err = s.client.RequestAndDecode(nil, "PATCH", "arvados/v1/collections/"+oob.UUID, nil, map[string]interface{}{ @@ -210,8 +227,6 @@ func (s *SiteFSSuite) TestProjectUpdatedByOther(c *check.C) { }) c.Assert(err, check.IsNil) - err = project.Sync() - c.Check(err, check.IsNil) _, err = s.fs.Open("/home/A Project/oob/test.txt") c.Check(err, check.NotNil) _, err = s.fs.Open("/home/A Project/oob") @@ -221,7 +236,27 @@ func (s *SiteFSSuite) TestProjectUpdatedByOther(c *check.C) { c.Assert(err, check.IsNil) err = project.Sync() - c.Check(err, check.IsNil) + c.Check(err, check.NotNil) // can't update the deleted collection _, err = s.fs.Open("/home/A Project/oob") - c.Check(err, check.NotNil) + c.Check(err, check.IsNil) // parent dir still has old collection -- didn't reload, because Sync failed +} + +func (s *SiteFSSuite) TestProjectUnsupportedOperations(c *check.C) { + s.fs.MountByID("by_id") + s.fs.MountProject("home", "") + + _, err := s.fs.OpenFile("/home/A Project/newfilename", os.O_CREATE|os.O_RDWR, 0) + c.Check(err, check.ErrorMatches, "invalid argument") + + err = s.fs.Mkdir("/home/A Project/newdirname", 0) + c.Check(err, check.ErrorMatches, "invalid argument") + + err = s.fs.Mkdir("/by_id/newdirname", 0) + c.Check(err, check.ErrorMatches, "invalid argument") + + err = s.fs.Mkdir("/by_id/"+fixtureAProjectUUID+"/newdirname", 0) + c.Check(err, check.ErrorMatches, "invalid argument") + + _, err = s.fs.OpenFile("/home/A Project", 0, 0) + c.Check(err, check.IsNil) } diff --git a/sdk/go/arvados/fs_site.go b/sdk/go/arvados/fs_site.go index 7826d335c8..900893aa36 100644 --- a/sdk/go/arvados/fs_site.go +++ b/sdk/go/arvados/fs_site.go @@ -40,7 +40,7 @@ func (c *Client) CustomFileSystem(kc keepClient) CustomFileSystem { thr: newThrottle(concurrentWriters), }, } - root.inode = &treenode{ + root.treenode = treenode{ fs: fs, parent: root, fileinfo: fileinfo{ @@ -54,9 +54,9 @@ func (c *Client) CustomFileSystem(kc keepClient) CustomFileSystem { } func (fs *customFileSystem) MountByID(mount string) { - fs.root.inode.Child(mount, func(inode) (inode, error) { + fs.root.treenode.Child(mount, func(inode) (inode, error) { return &vdirnode{ - inode: &treenode{ + treenode: treenode{ fs: fs, parent: fs.root, inodes: make(map[string]inode), @@ -72,18 +72,18 @@ func (fs *customFileSystem) MountByID(mount string) { } func (fs *customFileSystem) MountProject(mount, uuid string) { - fs.root.inode.Child(mount, func(inode) (inode, error) { + fs.root.treenode.Child(mount, func(inode) (inode, error) { return fs.newProjectNode(fs.root, mount, uuid), nil }) } func (fs *customFileSystem) MountUsers(mount string) { - fs.root.inode.Child(mount, func(inode) (inode, error) { + fs.root.treenode.Child(mount, func(inode) (inode, error) { return &lookupnode{ stale: fs.Stale, loadOne: fs.usersLoadOne, loadAll: fs.usersLoadAll, - inode: &treenode{ + treenode: treenode{ fs: fs, parent: fs.root, inodes: make(map[string]inode), @@ -115,10 +115,7 @@ func (c *Client) SiteFileSystem(kc keepClient) CustomFileSystem { } func (fs *customFileSystem) Sync() error { - fs.staleLock.Lock() - defer fs.staleLock.Unlock() - fs.staleThreshold = time.Now() - return nil + return fs.root.Sync() } // Stale returns true if information obtained at time t should be @@ -130,7 +127,7 @@ func (fs *customFileSystem) Stale(t time.Time) bool { } func (fs *customFileSystem) newNode(name string, perm os.FileMode, modTime time.Time) (node inode, err error) { - return nil, ErrInvalidOperation + return nil, ErrInvalidArgument } func (fs *customFileSystem) mountByID(parent inode, id string) inode { @@ -149,13 +146,13 @@ func (fs *customFileSystem) mountCollection(parent inode, id string) inode { if err != nil { return nil } - cfs, err := coll.FileSystem(fs, fs) + newfs, err := coll.FileSystem(fs, fs) if err != nil { return nil } - root := cfs.rootnode() - root.SetParent(parent, id) - return root + cfs := newfs.(*collectionFileSystem) + cfs.SetParent(parent, id) + return cfs } func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode { @@ -163,7 +160,7 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode stale: fs.Stale, loadOne: func(parent inode, name string) (inode, error) { return fs.projectsLoadOne(parent, uuid, name) }, loadAll: func(parent inode) ([]inode, error) { return fs.projectsLoadAll(parent, uuid) }, - inode: &treenode{ + treenode: treenode{ fs: fs, parent: root, inodes: make(map[string]inode), @@ -176,24 +173,24 @@ func (fs *customFileSystem) newProjectNode(root inode, name, uuid string) inode } } -// vdirnode wraps an inode by ignoring any requests to add/replace -// children, and calling a create() func when a non-existing child is -// looked up. +// vdirnode wraps an inode by rejecting (with ErrInvalidArgument) +// calls that add/replace children directly, instead calling a +// create() func when a non-existing child is looked up. // // create() can return either a new node, which will be added to the // treenode, or nil for ENOENT. type vdirnode struct { - inode + treenode create func(parent inode, name string) inode } func (vn *vdirnode) Child(name string, replace func(inode) (inode, error)) (inode, error) { - return vn.inode.Child(name, func(existing inode) (inode, error) { + return vn.treenode.Child(name, func(existing inode) (inode, error) { if existing == nil && vn.create != nil { existing = vn.create(vn, name) if existing != nil { existing.SetParent(vn, name) - vn.inode.(*treenode).fileinfo.modTime = time.Now() + vn.treenode.fileinfo.modTime = time.Now() } } if replace == nil { diff --git a/sdk/go/arvados/fs_site_test.go b/sdk/go/arvados/fs_site_test.go index 80cc03df37..778b12015a 100644 --- a/sdk/go/arvados/fs_site_test.go +++ b/sdk/go/arvados/fs_site_test.go @@ -7,6 +7,7 @@ package arvados import ( "net/http" "os" + "time" check "gopkg.in/check.v1" ) @@ -22,6 +23,8 @@ const ( fixtureFooCollectionPDH = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45" fixtureFooCollection = "zzzzz-4zz18-fy296fx3hot09f7" fixtureNonexistentCollection = "zzzzz-4zz18-totallynotexist" + fixtureBlobSigningKey = "zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc" + fixtureBlobSigningTTL = 336 * time.Hour ) var _ = check.Suite(&SiteFSSuite{}) @@ -41,7 +44,11 @@ func (s *SiteFSSuite) SetUpTest(c *check.C) { s.kc = &keepClientStub{ blocks: map[string][]byte{ "3858f62230ac3c915f300c664312c63f": []byte("foobar"), - }} + }, + sigkey: fixtureBlobSigningKey, + sigttl: fixtureBlobSigningTTL, + authToken: fixtureActiveToken, + } s.fs = s.client.SiteFileSystem(s.kc) } @@ -98,7 +105,7 @@ func (s *SiteFSSuite) TestByUUIDAndPDH(c *check.C) { c.Check(names, check.DeepEquals, []string{"baz"}) _, err = s.fs.OpenFile("/by_id/"+fixtureNonexistentCollection, os.O_RDWR|os.O_CREATE, 0755) - c.Check(err, check.Equals, ErrInvalidOperation) + c.Check(err, check.Equals, ErrInvalidArgument) err = s.fs.Rename("/by_id/"+fixtureFooCollection, "/by_id/beep") c.Check(err, check.Equals, ErrInvalidArgument) err = s.fs.Rename("/by_id/"+fixtureFooCollection+"/foo", "/by_id/beep") diff --git a/sdk/go/keepclient/keepclient_test.go b/sdk/go/keepclient/keepclient_test.go index a1801b2145..2604b02b17 100644 --- a/sdk/go/keepclient/keepclient_test.go +++ b/sdk/go/keepclient/keepclient_test.go @@ -535,6 +535,7 @@ func (s *StandaloneSuite) TestGetEmptyBlock(c *C) { defer ks.listener.Close() arv, err := arvadosclient.MakeArvadosClient() + c.Check(err, IsNil) kc, _ := MakeKeepClient(arv) arv.ApiToken = "abc123" kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil) diff --git a/services/api/.gitignore b/services/api/.gitignore index 793e981b50..0962779658 100644 --- a/services/api/.gitignore +++ b/services/api/.gitignore @@ -7,7 +7,6 @@ # Sensitive files and local configuration /config/database.yml -/config/initializers/omniauth.rb /config/application.yml # asset cache @@ -35,4 +34,4 @@ /package-build.version # Debugger history -.byebug_history \ No newline at end of file +.byebug_history diff --git a/services/api/app/controllers/user_sessions_controller.rb b/services/api/app/controllers/user_sessions_controller.rb index 582b98cf2d..8e3c3ac5e3 100644 --- a/services/api/app/controllers/user_sessions_controller.rb +++ b/services/api/app/controllers/user_sessions_controller.rb @@ -147,10 +147,15 @@ class UserSessionsController < ApplicationController find_or_create_by(url_prefix: api_client_url_prefix) end + token_expiration = nil + if Rails.configuration.Login.TokenLifetime > 0 + token_expiration = Time.now + Rails.configuration.Login.TokenLifetime + end @api_client_auth = ApiClientAuthorization. new(user: user, api_client: @api_client, created_by_ip_address: remote_ip, + expires_at: token_expiration, scopes: ["all"]) @api_client_auth.save! diff --git a/services/api/app/models/api_client.rb b/services/api/app/models/api_client.rb index 8ed693f820..c6c48a5b6b 100644 --- a/services/api/app/models/api_client.rb +++ b/services/api/app/models/api_client.rb @@ -15,13 +15,16 @@ class ApiClient < ArvadosModel end def is_trusted - norm(self.url_prefix) == norm(Rails.configuration.Services.Workbench1.ExternalURL) || - norm(self.url_prefix) == norm(Rails.configuration.Services.Workbench2.ExternalURL) || - super + (from_trusted_url && Rails.configuration.Login.TokenLifetime == 0) || super end protected + def from_trusted_url + norm(self.url_prefix) == norm(Rails.configuration.Services.Workbench1.ExternalURL) || + norm(self.url_prefix) == norm(Rails.configuration.Services.Workbench2.ExternalURL) + end + def norm url # normalize URL for comparison url = URI(url) diff --git a/services/api/config/application.rb b/services/api/config/application.rb index 369294e8a7..b28ae0e071 100644 --- a/services/api/config/application.rb +++ b/services/api/config/application.rb @@ -16,7 +16,7 @@ require "sprockets/railtie" require "rails/test_unit/railtie" # Skipping the following: # * ActionCable (new in Rails 5.0) as it adds '/cable' routes that we're not using -# * Skip ActiveStorage (new in Rails 5.1) +# * ActiveStorage (new in Rails 5.1) require 'digest' diff --git a/services/api/config/arvados_config.rb b/services/api/config/arvados_config.rb index 035a3972f8..4f831160e9 100644 --- a/services/api/config/arvados_config.rb +++ b/services/api/config/arvados_config.rb @@ -111,6 +111,7 @@ arvcfg.declare_config "Login.SSO.ProviderAppSecret", String, :sso_app_secret arvcfg.declare_config "Login.SSO.ProviderAppID", String, :sso_app_id arvcfg.declare_config "Login.LoginCluster", String arvcfg.declare_config "Login.RemoteTokenRefresh", ActiveSupport::Duration +arvcfg.declare_config "Login.TokenLifetime", ActiveSupport::Duration arvcfg.declare_config "TLS.Insecure", Boolean, :sso_insecure arvcfg.declare_config "Services.SSO.ExternalURL", String, :sso_provider_url arvcfg.declare_config "AuditLogs.MaxAge", ActiveSupport::Duration, :max_audit_log_age diff --git a/services/api/lib/tasks/manage_long_lived_tokens.rake b/services/api/lib/tasks/manage_long_lived_tokens.rake new file mode 100644 index 0000000000..7bcf315b04 --- /dev/null +++ b/services/api/lib/tasks/manage_long_lived_tokens.rake @@ -0,0 +1,61 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +# Tasks that can be useful when changing token expiration policies by assigning +# a non-zero value to Login.TokenLifetime config. + +require 'set' +require 'current_api_client' + +namespace :db do + desc "Apply expiration policy on long lived tokens" + task fix_long_lived_tokens: :environment do + if Rails.configuration.Login.TokenLifetime == 0 + puts("No expiration policy set on Login.TokenLifetime.") + else + exp_date = Time.now + Rails.configuration.Login.TokenLifetime + puts("Setting token expiration to: #{exp_date}") + token_count = 0 + ll_tokens.each do |auth| + if (auth.user.uuid =~ /-tpzed-000000000000000/).nil? + CurrentApiClientHelper.act_as_system_user do + auth.update_attributes!(expires_at: exp_date) + end + token_count += 1 + end + end + puts("#{token_count} tokens updated.") + end + end + + desc "Show users with long lived tokens" + task check_long_lived_tokens: :environment do + user_ids = Set.new() + token_count = 0 + ll_tokens.each do |auth| + if (auth.user.uuid =~ /-tpzed-000000000000000/).nil? + user_ids.add(auth.user_id) + token_count += 1 + end + end + + if user_ids.size > 0 + puts("Found #{token_count} long-lived tokens from users:") + user_ids.each do |uid| + u = User.find(uid) + puts("#{u.username},#{u.email},#{u.uuid}") if !u.nil? + end + else + puts("No long-lived tokens found.") + end + end + + def ll_tokens + query = ApiClientAuthorization.where(expires_at: nil) + if Rails.configuration.Login.TokenLifetime > 0 + query = query.or(ApiClientAuthorization.where("expires_at > ?", Time.now + Rails.configuration.Login.TokenLifetime)) + end + query + end +end diff --git a/services/api/test/functional/user_sessions_controller_test.rb b/services/api/test/functional/user_sessions_controller_test.rb index fc9475692a..cd475dea4d 100644 --- a/services/api/test/functional/user_sessions_controller_test.rb +++ b/services/api/test/functional/user_sessions_controller_test.rb @@ -14,7 +14,6 @@ class UserSessionsControllerTest < ActionController::TestCase assert_nil assigns(:api_client) end - test "send token when user is already logged in" do authorize_with :inactive api_client_page = 'http://client.example.com/home' @@ -26,6 +25,28 @@ class UserSessionsControllerTest < ActionController::TestCase assert_not_nil assigns(:api_client) end + test "login creates token without expiration by default" do + assert_equal Rails.configuration.Login.TokenLifetime, 0 + authorize_with :inactive + api_client_page = 'http://client.example.com/home' + get :login, params: {return_to: api_client_page} + assert_not_nil assigns(:api_client) + assert_nil assigns(:api_client_auth).expires_at + end + + test "login creates token with configured lifetime" do + token_lifetime = 1.hour + Rails.configuration.Login.TokenLifetime = token_lifetime + authorize_with :inactive + api_client_page = 'http://client.example.com/home' + get :login, params: {return_to: api_client_page} + assert_not_nil assigns(:api_client) + api_client_auth = assigns(:api_client_auth) + assert_in_delta(api_client_auth.expires_at, + api_client_auth.updated_at + token_lifetime, + 1.second) + end + test "login with remote param returns a salted token" do authorize_with :inactive api_client_page = 'http://client.example.com/home' diff --git a/services/api/test/unit/api_client_test.rb b/services/api/test/unit/api_client_test.rb index df082c27fd..93e4c51abf 100644 --- a/services/api/test/unit/api_client_test.rb +++ b/services/api/test/unit/api_client_test.rb @@ -7,25 +7,32 @@ require 'test_helper' class ApiClientTest < ActiveSupport::TestCase include CurrentApiClient - test "configured workbench is trusted" do - Rails.configuration.Services.Workbench1.ExternalURL = URI("http://wb1.example.com") - Rails.configuration.Services.Workbench2.ExternalURL = URI("https://wb2.example.com:443") + [true, false].each do |token_lifetime_enabled| + test "configured workbench is trusted when token lifetime is#{token_lifetime_enabled ? '': ' not'} enabled" do + Rails.configuration.Login.TokenLifetime = token_lifetime_enabled ? 8.hours : 0 + Rails.configuration.Services.Workbench1.ExternalURL = URI("http://wb1.example.com") + Rails.configuration.Services.Workbench2.ExternalURL = URI("https://wb2.example.com:443") - act_as_system_user do - [["http://wb0.example.com", false], - ["http://wb1.example.com", true], - ["http://wb2.example.com", false], - ["https://wb2.example.com", true], - ["https://wb2.example.com/", true], - ].each do |pfx, result| - a = ApiClient.create(url_prefix: pfx, is_trusted: false) - assert_equal result, a.is_trusted - end + act_as_system_user do + [["http://wb0.example.com", false], + ["http://wb1.example.com", true], + ["http://wb2.example.com", false], + ["https://wb2.example.com", true], + ["https://wb2.example.com/", true], + ].each do |pfx, result| + a = ApiClient.create(url_prefix: pfx, is_trusted: false) + if token_lifetime_enabled + assert_equal false, a.is_trusted, "API client with url prefix '#{pfx}' shouldn't be trusted" + else + assert_equal result, a.is_trusted + end + end - a = ApiClient.create(url_prefix: "http://example.com", is_trusted: true) - a.save! - a.reload - assert a.is_trusted + a = ApiClient.create(url_prefix: "http://example.com", is_trusted: true) + a.save! + a.reload + assert a.is_trusted + end end end end diff --git a/services/arv-git-httpd/gitolite_test.go b/services/arv-git-httpd/gitolite_test.go index 5f3cc608c3..fb0fc0d783 100644 --- a/services/arv-git-httpd/gitolite_test.go +++ b/services/arv-git-httpd/gitolite_test.go @@ -54,7 +54,7 @@ func (s *GitoliteSuite) SetUpTest(c *check.C) { s.cluster, err = cfg.GetCluster("") c.Assert(err, check.Equals, nil) - s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: "localhost:0"}: arvados.ServiceInstance{}} + s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{{Host: "localhost:0"}: {}} s.cluster.TLS.Insecure = true s.cluster.Git.GitCommand = "/usr/share/gitolite3/gitolite-shell" s.cluster.Git.GitoliteHome = s.gitoliteHome diff --git a/services/arv-web/README b/services/arv-web/README deleted file mode 100644 index eaf7624dc4..0000000000 --- a/services/arv-web/README +++ /dev/null @@ -1,6 +0,0 @@ -arv-web enables you to run a custom web service using the contents of an -Arvados collection. - -See "Using arv-web" in the Arvados user guide: - -http://doc.arvados.org/user/topics/arv-web.html diff --git a/services/arv-web/arv-web.py b/services/arv-web/arv-web.py deleted file mode 100755 index 55b710a754..0000000000 --- a/services/arv-web/arv-web.py +++ /dev/null @@ -1,256 +0,0 @@ -#!/usr/bin/env python -# Copyright (C) The Arvados Authors. All rights reserved. -# -# SPDX-License-Identifier: AGPL-3.0 - -# arv-web enables you to run a custom web service from the contents of an Arvados collection. -# -# See http://doc.arvados.org/user/topics/arv-web.html - -import arvados -from arvados.safeapi import ThreadSafeApiCache -import subprocess -from arvados_fuse import Operations, CollectionDirectory -import tempfile -import os -import llfuse -import threading -import Queue -import argparse -import logging -import signal -import sys -import functools - -logger = logging.getLogger('arvados.arv-web') -logger.setLevel(logging.INFO) - -class ArvWeb(object): - def __init__(self, project, docker_image, port): - self.project = project - self.loop = True - self.cid = None - self.prev_docker_image = None - self.mountdir = None - self.collection = None - self.override_docker_image = docker_image - self.port = port - self.evqueue = Queue.Queue() - self.api = ThreadSafeApiCache(arvados.config.settings()) - - if arvados.util.group_uuid_pattern.match(project) is None: - raise arvados.errors.ArgumentError("Project uuid is not valid") - - collections = self.api.collections().list(filters=[["owner_uuid", "=", project]], - limit=1, - order='modified_at desc').execute()['items'] - self.newcollection = collections[0]['uuid'] if collections else None - - self.ws = arvados.events.subscribe(self.api, [["object_uuid", "is_a", "arvados#collection"]], self.on_message) - - def check_docker_running(self): - # It would be less hacky to use "docker events" than poll "docker ps" - # but that would require writing a bigger pile of code. - if self.cid: - ps = subprocess.check_output(["docker", "ps", "--no-trunc=true", "--filter=status=running"]) - for l in ps.splitlines(): - if l.startswith(self.cid): - return True - return False - - # Handle messages from Arvados event bus. - def on_message(self, ev): - if 'event_type' in ev: - old_attr = None - if 'old_attributes' in ev['properties'] and ev['properties']['old_attributes']: - old_attr = ev['properties']['old_attributes'] - if self.project not in (ev['properties']['new_attributes']['owner_uuid'], - old_attr['owner_uuid'] if old_attr else None): - return - - et = ev['event_type'] - if ev['event_type'] == 'update': - if ev['properties']['new_attributes']['owner_uuid'] != ev['properties']['old_attributes']['owner_uuid']: - if self.project == ev['properties']['new_attributes']['owner_uuid']: - et = 'add' - else: - et = 'remove' - if ev['properties']['new_attributes']['trash_at'] is not None: - et = 'remove' - - self.evqueue.put((self.project, et, ev['object_uuid'])) - - # Run an arvados_fuse mount under the control of the local process. This lets - # us switch out the contents of the directory without having to unmount and - # remount. - def run_fuse_mount(self): - self.mountdir = tempfile.mkdtemp() - - self.operations = Operations(os.getuid(), os.getgid(), self.api, "utf-8") - self.cdir = CollectionDirectory(llfuse.ROOT_INODE, self.operations.inodes, self.api, 2, self.collection) - self.operations.inodes.add_entry(self.cdir) - - # Initialize the fuse connection - llfuse.init(self.operations, self.mountdir, ['allow_other']) - - t = threading.Thread(None, llfuse.main) - t.start() - - # wait until the driver is finished initializing - self.operations.initlock.wait() - - def mount_collection(self): - if self.newcollection != self.collection: - self.collection = self.newcollection - if not self.mountdir and self.collection: - self.run_fuse_mount() - - if self.mountdir: - with llfuse.lock: - self.cdir.clear() - # Switch the FUSE directory object so that it stores - # the newly selected collection - if self.collection: - logger.info("Mounting %s", self.collection) - else: - logger.info("Mount is empty") - self.cdir.change_collection(self.collection) - - - def stop_docker(self): - if self.cid: - logger.info("Stopping Docker container") - subprocess.call(["docker", "stop", self.cid]) - self.cid = None - - def run_docker(self): - try: - if self.collection is None: - self.stop_docker() - return - - docker_image = None - if self.override_docker_image: - docker_image = self.override_docker_image - else: - try: - with llfuse.lock: - if "docker_image" in self.cdir: - docker_image = self.cdir["docker_image"].readfrom(0, 1024).strip() - except IOError as e: - pass - - has_reload = False - try: - with llfuse.lock: - has_reload = "reload" in self.cdir - except IOError as e: - pass - - if docker_image is None: - logger.error("Collection must contain a file 'docker_image' or must specify --image on the command line.") - self.stop_docker() - return - - if docker_image == self.prev_docker_image and self.cid is not None and has_reload: - logger.info("Running container reload command") - subprocess.check_call(["docker", "exec", self.cid, "/mnt/reload"]) - return - - self.stop_docker() - - logger.info("Starting Docker container %s", docker_image) - self.cid = subprocess.check_output(["docker", "run", - "--detach=true", - "--publish=%i:80" % (self.port), - "--volume=%s:/mnt:ro" % self.mountdir, - docker_image]).strip() - - self.prev_docker_image = docker_image - logger.info("Container id %s", self.cid) - - except subprocess.CalledProcessError: - self.cid = None - - def wait_for_events(self): - if not self.cid: - logger.warning("No service running! Will wait for a new collection to appear in the project.") - else: - logger.info("Waiting for events") - - running = True - self.loop = True - while running: - # Main run loop. Wait on project events, signals, or the - # Docker container stopping. - - try: - # Poll the queue with a 1 second timeout, if we have no - # timeout the Python runtime doesn't have a chance to - # process SIGINT or SIGTERM. - eq = self.evqueue.get(True, 1) - logger.info("%s %s", eq[1], eq[2]) - self.newcollection = self.collection - if eq[1] in ('add', 'update', 'create'): - self.newcollection = eq[2] - elif eq[1] == 'remove': - collections = self.api.collections().list(filters=[["owner_uuid", "=", self.project]], - limit=1, - order='modified_at desc').execute()['items'] - self.newcollection = collections[0]['uuid'] if collections else None - running = False - except Queue.Empty: - pass - - if self.cid and not self.check_docker_running(): - logger.warning("Service has terminated. Will try to restart.") - self.cid = None - running = False - - - def run(self): - try: - while self.loop: - self.loop = False - self.mount_collection() - try: - self.run_docker() - self.wait_for_events() - except (KeyboardInterrupt): - logger.info("Got keyboard interrupt") - self.ws.close() - self.loop = False - except Exception as e: - logger.exception("Caught fatal exception, shutting down") - self.ws.close() - self.loop = False - finally: - self.stop_docker() - - if self.mountdir: - logger.info("Unmounting") - subprocess.call(["fusermount", "-u", self.mountdir]) - os.rmdir(self.mountdir) - - -def main(argv): - parser = argparse.ArgumentParser() - parser.add_argument('--project-uuid', type=str, required=True, help="Project uuid to watch") - parser.add_argument('--port', type=int, default=8080, help="Host port to listen on (default 8080)") - parser.add_argument('--image', type=str, help="Docker image to run") - - args = parser.parse_args(argv) - - signal.signal(signal.SIGTERM, lambda signal, frame: sys.exit(0)) - - try: - arvweb = ArvWeb(args.project_uuid, args.image, args.port) - arvweb.run() - except arvados.errors.ArgumentError as e: - logger.error(e) - return 1 - - return 0 - -if __name__ == '__main__': - sys.exit(main(sys.argv[1:])) diff --git a/services/arv-web/sample-cgi-app/docker_image b/services/arv-web/sample-cgi-app/docker_image deleted file mode 100644 index 57f344fcd7..0000000000 --- a/services/arv-web/sample-cgi-app/docker_image +++ /dev/null @@ -1 +0,0 @@ -arvados/arv-web \ No newline at end of file diff --git a/services/arv-web/sample-cgi-app/public/.htaccess b/services/arv-web/sample-cgi-app/public/.htaccess deleted file mode 100644 index e5145bd37d..0000000000 --- a/services/arv-web/sample-cgi-app/public/.htaccess +++ /dev/null @@ -1,3 +0,0 @@ -Options +ExecCGI -AddHandler cgi-script .cgi -DirectoryIndex index.cgi diff --git a/services/arv-web/sample-cgi-app/public/index.cgi b/services/arv-web/sample-cgi-app/public/index.cgi deleted file mode 100755 index 57bc2a9a01..0000000000 --- a/services/arv-web/sample-cgi-app/public/index.cgi +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/perl - -print "Content-type: text/html\n\n"; -print "Hello world from perl!"; diff --git a/services/arv-web/sample-cgi-app/tmp/.keepkeep b/services/arv-web/sample-cgi-app/tmp/.keepkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/services/arv-web/sample-rack-app/config.ru b/services/arv-web/sample-rack-app/config.ru deleted file mode 100644 index 65f3c7ca36..0000000000 --- a/services/arv-web/sample-rack-app/config.ru +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (C) The Arvados Authors. All rights reserved. -# -# SPDX-License-Identifier: AGPL-3.0 - -app = proc do |env| - [200, { "Content-Type" => "text/html" }, ["hello world from ruby"]] -end -run app diff --git a/services/arv-web/sample-rack-app/docker_image b/services/arv-web/sample-rack-app/docker_image deleted file mode 100644 index 57f344fcd7..0000000000 --- a/services/arv-web/sample-rack-app/docker_image +++ /dev/null @@ -1 +0,0 @@ -arvados/arv-web \ No newline at end of file diff --git a/services/arv-web/sample-rack-app/public/.keepkeep b/services/arv-web/sample-rack-app/public/.keepkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/services/arv-web/sample-rack-app/tmp/.keepkeep b/services/arv-web/sample-rack-app/tmp/.keepkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/services/arv-web/sample-static-page/docker_image b/services/arv-web/sample-static-page/docker_image deleted file mode 100644 index 57f344fcd7..0000000000 --- a/services/arv-web/sample-static-page/docker_image +++ /dev/null @@ -1 +0,0 @@ -arvados/arv-web \ No newline at end of file diff --git a/services/arv-web/sample-static-page/public/index.html b/services/arv-web/sample-static-page/public/index.html deleted file mode 100644 index e8608a5ebe..0000000000 --- a/services/arv-web/sample-static-page/public/index.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - arv-web sample - -

Hello world static page

- - diff --git a/services/arv-web/sample-static-page/tmp/.keepkeep b/services/arv-web/sample-static-page/tmp/.keepkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/services/arv-web/sample-wsgi-app/docker_image b/services/arv-web/sample-wsgi-app/docker_image deleted file mode 100644 index 57f344fcd7..0000000000 --- a/services/arv-web/sample-wsgi-app/docker_image +++ /dev/null @@ -1 +0,0 @@ -arvados/arv-web \ No newline at end of file diff --git a/services/arv-web/sample-wsgi-app/passenger_wsgi.py b/services/arv-web/sample-wsgi-app/passenger_wsgi.py deleted file mode 100644 index faec3c23cd..0000000000 --- a/services/arv-web/sample-wsgi-app/passenger_wsgi.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (C) The Arvados Authors. All rights reserved. -# -# SPDX-License-Identifier: AGPL-3.0 - -def application(environ, start_response): - start_response('200 OK', [('Content-Type', 'text/plain')]) - return [b"hello world from python!\n"] diff --git a/services/arv-web/sample-wsgi-app/public/.keepkeep b/services/arv-web/sample-wsgi-app/public/.keepkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/services/arv-web/sample-wsgi-app/tmp/.keepkeep b/services/arv-web/sample-wsgi-app/tmp/.keepkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go index 643ca4f587..963948cc6b 100644 --- a/services/keep-web/handler.go +++ b/services/keep-web/handler.go @@ -185,10 +185,6 @@ var ( func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { h.setupOnce.Do(h.setup) - remoteAddr := r.RemoteAddr - if xff := r.Header.Get("X-Forwarded-For"); xff != "" { - remoteAddr = xff + "," + remoteAddr - } if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" { r.URL.Scheme = xfp } @@ -227,6 +223,10 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { w.Header().Set("Access-Control-Expose-Headers", "Content-Range") } + if h.serveS3(w, r) { + return + } + pathParts := strings.Split(r.URL.Path[1:], "/") var stripParts int @@ -509,6 +509,27 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) { } } +func (h *handler) getClients(reqID, token string) (arv *arvadosclient.ArvadosClient, kc *keepclient.KeepClient, client *arvados.Client, release func(), err error) { + arv = h.clientPool.Get() + if arv == nil { + return nil, nil, nil, nil, err + } + release = func() { h.clientPool.Put(arv) } + arv.ApiToken = token + kc, err = keepclient.MakeKeepClient(arv) + if err != nil { + release() + return + } + kc.RequestID = reqID + client = (&arvados.Client{ + APIHost: arv.ApiServer, + AuthToken: arv.ApiToken, + Insecure: arv.ApiInsecure, + }).WithRequestID(reqID) + return +} + func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []string, credentialsOK, attachment bool) { if len(tokens) == 0 { w.Header().Add("WWW-Authenticate", "Basic realm=\"collections\"") @@ -519,25 +540,13 @@ func (h *handler) serveSiteFS(w http.ResponseWriter, r *http.Request, tokens []s http.Error(w, errReadOnly.Error(), http.StatusMethodNotAllowed) return } - arv := h.clientPool.Get() - if arv == nil { + _, kc, client, release, err := h.getClients(r.Header.Get("X-Request-Id"), tokens[0]) + if err != nil { http.Error(w, "Pool failed: "+h.clientPool.Err().Error(), http.StatusInternalServerError) return } - defer h.clientPool.Put(arv) - arv.ApiToken = tokens[0] + defer release() - kc, err := keepclient.MakeKeepClient(arv) - if err != nil { - http.Error(w, "error setting up keep client: "+err.Error(), http.StatusInternalServerError) - return - } - kc.RequestID = r.Header.Get("X-Request-Id") - client := (&arvados.Client{ - APIHost: arv.ApiServer, - AuthToken: arv.ApiToken, - Insecure: arv.ApiInsecure, - }).WithRequestID(r.Header.Get("X-Request-Id")) fs := client.SiteFileSystem(kc) fs.ForwardSlashNameSubstitution(h.Config.cluster.Collections.ForwardSlashNameSubstitution) f, err := fs.Open(r.URL.Path) diff --git a/services/keep-web/main.go b/services/keep-web/main.go index e4028842f0..647eab1653 100644 --- a/services/keep-web/main.go +++ b/services/keep-web/main.go @@ -14,6 +14,7 @@ import ( "git.arvados.org/arvados.git/sdk/go/arvados" "github.com/coreos/go-systemd/daemon" "github.com/ghodss/yaml" + "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus" ) @@ -111,7 +112,7 @@ func main() { os.Setenv("ARVADOS_API_HOST", cfg.cluster.Services.Controller.ExternalURL.Host) srv := &server{Config: cfg} - if err := srv.Start(); err != nil { + if err := srv.Start(logrus.StandardLogger()); err != nil { log.Fatal(err) } if _, err := daemon.SdNotify(false, "READY=1"); err != nil { diff --git a/services/keep-web/s3.go b/services/keep-web/s3.go new file mode 100644 index 0000000000..01bc8b7047 --- /dev/null +++ b/services/keep-web/s3.go @@ -0,0 +1,452 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + +package main + +import ( + "encoding/xml" + "errors" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/ctxlog" + "github.com/AdRoll/goamz/s3" +) + +const s3MaxKeys = 1000 + +// serveS3 handles r and returns true if r is a request from an S3 +// client, otherwise it returns false. +func (h *handler) serveS3(w http.ResponseWriter, r *http.Request) bool { + var token string + if auth := r.Header.Get("Authorization"); strings.HasPrefix(auth, "AWS ") { + split := strings.SplitN(auth[4:], ":", 2) + if len(split) < 2 { + w.WriteHeader(http.StatusUnauthorized) + return true + } + token = split[0] + } else if strings.HasPrefix(auth, "AWS4-HMAC-SHA256 ") { + for _, cmpt := range strings.Split(auth[17:], ",") { + cmpt = strings.TrimSpace(cmpt) + split := strings.SplitN(cmpt, "=", 2) + if len(split) == 2 && split[0] == "Credential" { + keyandscope := strings.Split(split[1], "/") + if len(keyandscope[0]) > 0 { + token = keyandscope[0] + break + } + } + } + if token == "" { + w.WriteHeader(http.StatusBadRequest) + fmt.Println(w, "invalid V4 signature") + return true + } + } else { + return false + } + + _, kc, client, release, err := h.getClients(r.Header.Get("X-Request-Id"), token) + if err != nil { + http.Error(w, "Pool failed: "+h.clientPool.Err().Error(), http.StatusInternalServerError) + return true + } + defer release() + + fs := client.SiteFileSystem(kc) + fs.ForwardSlashNameSubstitution(h.Config.cluster.Collections.ForwardSlashNameSubstitution) + + objectNameGiven := strings.Count(strings.TrimSuffix(r.URL.Path, "/"), "/") > 1 + + switch { + case r.Method == http.MethodGet && !objectNameGiven: + // Path is "/{uuid}" or "/{uuid}/", has no object name + if _, ok := r.URL.Query()["versioning"]; ok { + // GetBucketVersioning + w.Header().Set("Content-Type", "application/xml") + io.WriteString(w, xml.Header) + fmt.Fprintln(w, ``) + } else { + // ListObjects + h.s3list(w, r, fs) + } + return true + case r.Method == http.MethodGet || r.Method == http.MethodHead: + fspath := "/by_id" + r.URL.Path + fi, err := fs.Stat(fspath) + if r.Method == "HEAD" && !objectNameGiven { + // HeadBucket + if err == nil && fi.IsDir() { + w.WriteHeader(http.StatusOK) + } else if os.IsNotExist(err) { + w.WriteHeader(http.StatusNotFound) + } else { + http.Error(w, err.Error(), http.StatusBadGateway) + } + return true + } + if err == nil && fi.IsDir() && objectNameGiven && strings.HasSuffix(fspath, "/") && h.Config.cluster.Collections.S3FolderObjects { + w.Header().Set("Content-Type", "application/x-directory") + w.WriteHeader(http.StatusOK) + return true + } + if os.IsNotExist(err) || + (err != nil && err.Error() == "not a directory") || + (fi != nil && fi.IsDir()) { + http.Error(w, "not found", http.StatusNotFound) + return true + } + // shallow copy r, and change URL path + r := *r + r.URL.Path = fspath + http.FileServer(fs).ServeHTTP(w, &r) + return true + case r.Method == http.MethodPut: + if !objectNameGiven { + http.Error(w, "missing object name in PUT request", http.StatusBadRequest) + return true + } + fspath := "by_id" + r.URL.Path + var objectIsDir bool + if strings.HasSuffix(fspath, "/") { + if !h.Config.cluster.Collections.S3FolderObjects { + http.Error(w, "invalid object name: trailing slash", http.StatusBadRequest) + return true + } + n, err := r.Body.Read(make([]byte, 1)) + if err != nil && err != io.EOF { + http.Error(w, fmt.Sprintf("error reading request body: %s", err), http.StatusInternalServerError) + return true + } else if n > 0 { + http.Error(w, "cannot create object with trailing '/' char unless content is empty", http.StatusBadRequest) + return true + } else if strings.SplitN(r.Header.Get("Content-Type"), ";", 2)[0] != "application/x-directory" { + http.Error(w, "cannot create object with trailing '/' char unless Content-Type is 'application/x-directory'", http.StatusBadRequest) + return true + } + // Given PUT "foo/bar/", we'll use "foo/bar/." + // in the "ensure parents exist" block below, + // and then we'll be done. + fspath += "." + objectIsDir = true + } + fi, err := fs.Stat(fspath) + if err != nil && err.Error() == "not a directory" { + // requested foo/bar, but foo is a file + http.Error(w, "object name conflicts with existing object", http.StatusBadRequest) + return true + } + if strings.HasSuffix(r.URL.Path, "/") && err == nil && !fi.IsDir() { + // requested foo/bar/, but foo/bar is a file + http.Error(w, "object name conflicts with existing object", http.StatusBadRequest) + return true + } + // create missing parent/intermediate directories, if any + for i, c := range fspath { + if i > 0 && c == '/' { + dir := fspath[:i] + if strings.HasSuffix(dir, "/") { + err = errors.New("invalid object name (consecutive '/' chars)") + http.Error(w, err.Error(), http.StatusBadRequest) + return true + } + err = fs.Mkdir(dir, 0755) + if err == arvados.ErrInvalidArgument { + // Cannot create a directory + // here. + err = fmt.Errorf("mkdir %q failed: %w", dir, err) + http.Error(w, err.Error(), http.StatusBadRequest) + return true + } else if err != nil && !os.IsExist(err) { + err = fmt.Errorf("mkdir %q failed: %w", dir, err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return true + } + } + } + if !objectIsDir { + f, err := fs.OpenFile(fspath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644) + if os.IsNotExist(err) { + f, err = fs.OpenFile(fspath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0644) + } + if err != nil { + err = fmt.Errorf("open %q failed: %w", r.URL.Path, err) + http.Error(w, err.Error(), http.StatusBadRequest) + return true + } + defer f.Close() + _, err = io.Copy(f, r.Body) + if err != nil { + err = fmt.Errorf("write to %q failed: %w", r.URL.Path, err) + http.Error(w, err.Error(), http.StatusBadGateway) + return true + } + err = f.Close() + if err != nil { + err = fmt.Errorf("write to %q failed: close: %w", r.URL.Path, err) + http.Error(w, err.Error(), http.StatusBadGateway) + return true + } + } + err = fs.Sync() + if err != nil { + err = fmt.Errorf("sync failed: %w", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return true + } + w.WriteHeader(http.StatusOK) + return true + case r.Method == http.MethodDelete: + if !objectNameGiven || r.URL.Path == "/" { + http.Error(w, "missing object name in DELETE request", http.StatusBadRequest) + return true + } + fspath := "by_id" + r.URL.Path + if strings.HasSuffix(fspath, "/") { + fspath = strings.TrimSuffix(fspath, "/") + fi, err := fs.Stat(fspath) + if os.IsNotExist(err) { + w.WriteHeader(http.StatusNoContent) + return true + } else if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return true + } else if !fi.IsDir() { + // if "foo" exists and is a file, then + // "foo/" doesn't exist, so we say + // delete was successful. + w.WriteHeader(http.StatusNoContent) + return true + } + } else if fi, err := fs.Stat(fspath); err == nil && fi.IsDir() { + // if "foo" is a dir, it is visible via S3 + // only as "foo/", not "foo" -- so we leave + // the dir alone and return 204 to indicate + // that "foo" does not exist. + w.WriteHeader(http.StatusNoContent) + return true + } + err = fs.Remove(fspath) + if os.IsNotExist(err) { + w.WriteHeader(http.StatusNoContent) + return true + } + if err != nil { + err = fmt.Errorf("rm failed: %w", err) + http.Error(w, err.Error(), http.StatusBadRequest) + return true + } + err = fs.Sync() + if err != nil { + err = fmt.Errorf("sync failed: %w", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return true + } + w.WriteHeader(http.StatusNoContent) + return true + default: + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return true + } +} + +// Call fn on the given path (directory) and its contents, in +// lexicographic order. +// +// If isRoot==true and path is not a directory, return nil. +// +// If fn returns filepath.SkipDir when called on a directory, don't +// descend into that directory. +func walkFS(fs arvados.CustomFileSystem, path string, isRoot bool, fn func(path string, fi os.FileInfo) error) error { + if isRoot { + fi, err := fs.Stat(path) + if os.IsNotExist(err) || (err == nil && !fi.IsDir()) { + return nil + } else if err != nil { + return err + } + err = fn(path, fi) + if err == filepath.SkipDir { + return nil + } else if err != nil { + return err + } + } + f, err := fs.Open(path) + if os.IsNotExist(err) && isRoot { + return nil + } else if err != nil { + return fmt.Errorf("open %q: %w", path, err) + } + defer f.Close() + if path == "/" { + path = "" + } + fis, err := f.Readdir(-1) + if err != nil { + return err + } + sort.Slice(fis, func(i, j int) bool { return fis[i].Name() < fis[j].Name() }) + for _, fi := range fis { + err = fn(path+"/"+fi.Name(), fi) + if err == filepath.SkipDir { + continue + } else if err != nil { + return err + } + if fi.IsDir() { + err = walkFS(fs, path+"/"+fi.Name(), false, fn) + if err != nil { + return err + } + } + } + return nil +} + +var errDone = errors.New("done") + +func (h *handler) s3list(w http.ResponseWriter, r *http.Request, fs arvados.CustomFileSystem) { + var params struct { + bucket string + delimiter string + marker string + maxKeys int + prefix string + } + params.bucket = strings.SplitN(r.URL.Path[1:], "/", 2)[0] + params.delimiter = r.FormValue("delimiter") + params.marker = r.FormValue("marker") + if mk, _ := strconv.ParseInt(r.FormValue("max-keys"), 10, 64); mk > 0 && mk < s3MaxKeys { + params.maxKeys = int(mk) + } else { + params.maxKeys = s3MaxKeys + } + params.prefix = r.FormValue("prefix") + + bucketdir := "by_id/" + params.bucket + // walkpath is the directory (relative to bucketdir) we need + // to walk: the innermost directory that is guaranteed to + // contain all paths that have the requested prefix. Examples: + // prefix "foo/bar" => walkpath "foo" + // prefix "foo/bar/" => walkpath "foo/bar" + // prefix "foo" => walkpath "" + // prefix "" => walkpath "" + walkpath := params.prefix + if cut := strings.LastIndex(walkpath, "/"); cut >= 0 { + walkpath = walkpath[:cut] + } else { + walkpath = "" + } + + resp := s3.ListResp{ + Name: strings.SplitN(r.URL.Path[1:], "/", 2)[0], + Prefix: params.prefix, + Delimiter: params.delimiter, + Marker: params.marker, + MaxKeys: params.maxKeys, + } + commonPrefixes := map[string]bool{} + err := walkFS(fs, strings.TrimSuffix(bucketdir+"/"+walkpath, "/"), true, func(path string, fi os.FileInfo) error { + if path == bucketdir { + return nil + } + path = path[len(bucketdir)+1:] + filesize := fi.Size() + if fi.IsDir() { + path += "/" + filesize = 0 + } + if len(path) <= len(params.prefix) { + if path > params.prefix[:len(path)] { + // with prefix "foobar", walking "fooz" means we're done + return errDone + } + if path < params.prefix[:len(path)] { + // with prefix "foobar", walking "foobag" is pointless + return filepath.SkipDir + } + if fi.IsDir() && !strings.HasPrefix(params.prefix+"/", path) { + // with prefix "foo/bar", walking "fo" + // is pointless (but walking "foo" or + // "foo/bar" is necessary) + return filepath.SkipDir + } + if len(path) < len(params.prefix) { + // can't skip anything, and this entry + // isn't in the results, so just + // continue descent + return nil + } + } else { + if path[:len(params.prefix)] > params.prefix { + // with prefix "foobar", nothing we + // see after "foozzz" is relevant + return errDone + } + } + if path < params.marker || path < params.prefix { + return nil + } + if fi.IsDir() && !h.Config.cluster.Collections.S3FolderObjects { + // Note we don't add anything to + // commonPrefixes here even if delimiter is + // "/". We descend into the directory, and + // return a commonPrefix only if we end up + // finding a regular file inside it. + return nil + } + if params.delimiter != "" { + idx := strings.Index(path[len(params.prefix):], params.delimiter) + if idx >= 0 { + // with prefix "foobar" and delimiter + // "z", when we hit "foobar/baz", we + // add "/baz" to commonPrefixes and + // stop descending. + commonPrefixes[path[:len(params.prefix)+idx+1]] = true + return filepath.SkipDir + } + } + if len(resp.Contents)+len(commonPrefixes) >= params.maxKeys { + resp.IsTruncated = true + if params.delimiter != "" { + resp.NextMarker = path + } + return errDone + } + resp.Contents = append(resp.Contents, s3.Key{ + Key: path, + LastModified: fi.ModTime().UTC().Format("2006-01-02T15:04:05.999") + "Z", + Size: filesize, + }) + return nil + }) + if err != nil && err != errDone { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + if params.delimiter != "" { + for prefix := range commonPrefixes { + resp.CommonPrefixes = append(resp.CommonPrefixes, prefix) + sort.Strings(resp.CommonPrefixes) + } + } + wrappedResp := struct { + XMLName string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult"` + s3.ListResp + }{"", resp} + w.Header().Set("Content-Type", "application/xml") + io.WriteString(w, xml.Header) + if err := xml.NewEncoder(w).Encode(wrappedResp); err != nil { + ctxlog.FromContext(r.Context()).WithError(err).Error("error writing xml response") + } +} diff --git a/services/keep-web/s3_test.go b/services/keep-web/s3_test.go new file mode 100644 index 0000000000..b82f1efd78 --- /dev/null +++ b/services/keep-web/s3_test.go @@ -0,0 +1,584 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + +package main + +import ( + "bytes" + "crypto/rand" + "fmt" + "io/ioutil" + "net/http" + "os" + "strings" + "sync" + "time" + + "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/arvadosclient" + "git.arvados.org/arvados.git/sdk/go/arvadostest" + "git.arvados.org/arvados.git/sdk/go/keepclient" + "github.com/AdRoll/goamz/aws" + "github.com/AdRoll/goamz/s3" + check "gopkg.in/check.v1" +) + +type s3stage struct { + arv *arvados.Client + ac *arvadosclient.ArvadosClient + kc *keepclient.KeepClient + proj arvados.Group + projbucket *s3.Bucket + coll arvados.Collection + collbucket *s3.Bucket +} + +func (s *IntegrationSuite) s3setup(c *check.C) s3stage { + var proj arvados.Group + var coll arvados.Collection + arv := arvados.NewClientFromEnv() + arv.AuthToken = arvadostest.ActiveToken + err := arv.RequestAndDecode(&proj, "POST", "arvados/v1/groups", nil, map[string]interface{}{ + "group": map[string]interface{}{ + "group_class": "project", + "name": "keep-web s3 test", + }, + "ensure_unique_name": true, + }) + c.Assert(err, check.IsNil) + err = arv.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{"collection": map[string]interface{}{ + "owner_uuid": proj.UUID, + "name": "keep-web s3 test collection", + "manifest_text": ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:emptyfile\n./emptydir d41d8cd98f00b204e9800998ecf8427e+0 0:0:.\n", + }}) + c.Assert(err, check.IsNil) + ac, err := arvadosclient.New(arv) + c.Assert(err, check.IsNil) + kc, err := keepclient.MakeKeepClient(ac) + c.Assert(err, check.IsNil) + fs, err := coll.FileSystem(arv, kc) + c.Assert(err, check.IsNil) + f, err := fs.OpenFile("sailboat.txt", os.O_CREATE|os.O_WRONLY, 0644) + c.Assert(err, check.IsNil) + _, err = f.Write([]byte("⛵\n")) + c.Assert(err, check.IsNil) + err = f.Close() + c.Assert(err, check.IsNil) + err = fs.Sync() + c.Assert(err, check.IsNil) + err = arv.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil) + c.Assert(err, check.IsNil) + + auth := aws.NewAuth(arvadostest.ActiveTokenV2, arvadostest.ActiveTokenV2, "", time.Now().Add(time.Hour)) + region := aws.Region{ + Name: s.testServer.Addr, + S3Endpoint: "http://" + s.testServer.Addr, + } + client := s3.New(*auth, region) + return s3stage{ + arv: arv, + ac: ac, + kc: kc, + proj: proj, + projbucket: &s3.Bucket{ + S3: client, + Name: proj.UUID, + }, + coll: coll, + collbucket: &s3.Bucket{ + S3: client, + Name: coll.UUID, + }, + } +} + +func (stage s3stage) teardown(c *check.C) { + if stage.coll.UUID != "" { + err := stage.arv.RequestAndDecode(&stage.coll, "DELETE", "arvados/v1/collections/"+stage.coll.UUID, nil, nil) + c.Check(err, check.IsNil) + } + if stage.proj.UUID != "" { + err := stage.arv.RequestAndDecode(&stage.proj, "DELETE", "arvados/v1/groups/"+stage.proj.UUID, nil, nil) + c.Check(err, check.IsNil) + } +} + +func (s *IntegrationSuite) TestS3HeadBucket(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + + for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} { + c.Logf("bucket %s", bucket.Name) + exists, err := bucket.Exists("") + c.Check(err, check.IsNil) + c.Check(exists, check.Equals, true) + } +} + +func (s *IntegrationSuite) TestS3CollectionGetObject(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + s.testS3GetObject(c, stage.collbucket, "") +} +func (s *IntegrationSuite) TestS3ProjectGetObject(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + s.testS3GetObject(c, stage.projbucket, stage.coll.Name+"/") +} +func (s *IntegrationSuite) testS3GetObject(c *check.C, bucket *s3.Bucket, prefix string) { + rdr, err := bucket.GetReader(prefix + "emptyfile") + c.Assert(err, check.IsNil) + buf, err := ioutil.ReadAll(rdr) + c.Check(err, check.IsNil) + c.Check(len(buf), check.Equals, 0) + err = rdr.Close() + c.Check(err, check.IsNil) + + // GetObject + rdr, err = bucket.GetReader(prefix + "missingfile") + c.Check(err, check.ErrorMatches, `404 Not Found`) + + // HeadObject + exists, err := bucket.Exists(prefix + "missingfile") + c.Check(err, check.IsNil) + c.Check(exists, check.Equals, false) + + // GetObject + rdr, err = bucket.GetReader(prefix + "sailboat.txt") + c.Assert(err, check.IsNil) + buf, err = ioutil.ReadAll(rdr) + c.Check(err, check.IsNil) + c.Check(buf, check.DeepEquals, []byte("⛵\n")) + err = rdr.Close() + c.Check(err, check.IsNil) + + // HeadObject + exists, err = bucket.Exists(prefix + "sailboat.txt") + c.Check(err, check.IsNil) + c.Check(exists, check.Equals, true) +} + +func (s *IntegrationSuite) TestS3CollectionPutObjectSuccess(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + s.testS3PutObjectSuccess(c, stage.collbucket, "") +} +func (s *IntegrationSuite) TestS3ProjectPutObjectSuccess(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + s.testS3PutObjectSuccess(c, stage.projbucket, stage.coll.Name+"/") +} +func (s *IntegrationSuite) testS3PutObjectSuccess(c *check.C, bucket *s3.Bucket, prefix string) { + for _, trial := range []struct { + path string + size int + contentType string + }{ + { + path: "newfile", + size: 128000000, + contentType: "application/octet-stream", + }, { + path: "newdir/newfile", + size: 1 << 26, + contentType: "application/octet-stream", + }, { + path: "newdir1/newdir2/newfile", + size: 0, + contentType: "application/octet-stream", + }, { + path: "newdir1/newdir2/newdir3/", + size: 0, + contentType: "application/x-directory", + }, + } { + c.Logf("=== %v", trial) + + objname := prefix + trial.path + + _, err := bucket.GetReader(objname) + c.Assert(err, check.ErrorMatches, `404 Not Found`) + + buf := make([]byte, trial.size) + rand.Read(buf) + + err = bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{}) + c.Check(err, check.IsNil) + + rdr, err := bucket.GetReader(objname) + if strings.HasSuffix(trial.path, "/") && !s.testServer.Config.cluster.Collections.S3FolderObjects { + c.Check(err, check.NotNil) + continue + } else if !c.Check(err, check.IsNil) { + continue + } + buf2, err := ioutil.ReadAll(rdr) + c.Check(err, check.IsNil) + c.Check(buf2, check.HasLen, len(buf)) + c.Check(bytes.Equal(buf, buf2), check.Equals, true) + } +} + +func (s *IntegrationSuite) TestS3ProjectPutObjectNotSupported(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + bucket := stage.projbucket + + for _, trial := range []struct { + path string + size int + contentType string + }{ + { + path: "newfile", + size: 1234, + contentType: "application/octet-stream", + }, { + path: "newdir/newfile", + size: 1234, + contentType: "application/octet-stream", + }, { + path: "newdir2/", + size: 0, + contentType: "application/x-directory", + }, + } { + c.Logf("=== %v", trial) + + _, err := bucket.GetReader(trial.path) + c.Assert(err, check.ErrorMatches, `404 Not Found`) + + buf := make([]byte, trial.size) + rand.Read(buf) + + err = bucket.PutReader(trial.path, bytes.NewReader(buf), int64(len(buf)), trial.contentType, s3.Private, s3.Options{}) + c.Check(err, check.ErrorMatches, `400 Bad Request`) + + _, err = bucket.GetReader(trial.path) + c.Assert(err, check.ErrorMatches, `404 Not Found`) + } +} + +func (s *IntegrationSuite) TestS3CollectionDeleteObject(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + s.testS3DeleteObject(c, stage.collbucket, "") +} +func (s *IntegrationSuite) TestS3ProjectDeleteObject(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + s.testS3DeleteObject(c, stage.projbucket, stage.coll.Name+"/") +} +func (s *IntegrationSuite) testS3DeleteObject(c *check.C, bucket *s3.Bucket, prefix string) { + s.testServer.Config.cluster.Collections.S3FolderObjects = true + for _, trial := range []struct { + path string + }{ + {"/"}, + {"nonexistentfile"}, + {"emptyfile"}, + {"sailboat.txt"}, + {"sailboat.txt/"}, + {"emptydir"}, + {"emptydir/"}, + } { + objname := prefix + trial.path + comment := check.Commentf("objname %q", objname) + + err := bucket.Del(objname) + if trial.path == "/" { + c.Check(err, check.NotNil) + continue + } + c.Check(err, check.IsNil, comment) + _, err = bucket.GetReader(objname) + c.Check(err, check.NotNil, comment) + } +} + +func (s *IntegrationSuite) TestS3CollectionPutObjectFailure(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + s.testS3PutObjectFailure(c, stage.collbucket, "") +} +func (s *IntegrationSuite) TestS3ProjectPutObjectFailure(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + s.testS3PutObjectFailure(c, stage.projbucket, stage.coll.Name+"/") +} +func (s *IntegrationSuite) testS3PutObjectFailure(c *check.C, bucket *s3.Bucket, prefix string) { + s.testServer.Config.cluster.Collections.S3FolderObjects = false + var wg sync.WaitGroup + for _, trial := range []struct { + path string + }{ + { + path: "emptyfile/newname", // emptyfile exists, see s3setup() + }, { + path: "emptyfile/", // emptyfile exists, see s3setup() + }, { + path: "emptydir", // dir already exists, see s3setup() + }, { + path: "emptydir/", + }, { + path: "emptydir//", + }, { + path: "newdir/", + }, { + path: "newdir//", + }, { + path: "/", + }, { + path: "//", + }, { + path: "foo//bar", + }, { + path: "", + }, + } { + trial := trial + wg.Add(1) + go func() { + defer wg.Done() + c.Logf("=== %v", trial) + + objname := prefix + trial.path + + buf := make([]byte, 1234) + rand.Read(buf) + + err := bucket.PutReader(objname, bytes.NewReader(buf), int64(len(buf)), "application/octet-stream", s3.Private, s3.Options{}) + if !c.Check(err, check.ErrorMatches, `400 Bad.*`, check.Commentf("PUT %q should fail", objname)) { + return + } + + if objname != "" && objname != "/" { + _, err = bucket.GetReader(objname) + c.Check(err, check.ErrorMatches, `404 Not Found`, check.Commentf("GET %q should return 404", objname)) + } + }() + } + wg.Wait() +} + +func (stage *s3stage) writeBigDirs(c *check.C, dirs int, filesPerDir int) { + fs, err := stage.coll.FileSystem(stage.arv, stage.kc) + c.Assert(err, check.IsNil) + for d := 0; d < dirs; d++ { + dir := fmt.Sprintf("dir%d", d) + c.Assert(fs.Mkdir(dir, 0755), check.IsNil) + for i := 0; i < filesPerDir; i++ { + f, err := fs.OpenFile(fmt.Sprintf("%s/file%d.txt", dir, i), os.O_CREATE|os.O_WRONLY, 0644) + c.Assert(err, check.IsNil) + c.Assert(f.Close(), check.IsNil) + } + } + c.Assert(fs.Sync(), check.IsNil) +} + +func (s *IntegrationSuite) TestS3GetBucketVersioning(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} { + req, err := http.NewRequest("GET", bucket.URL("/"), nil) + c.Check(err, check.IsNil) + req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none") + req.URL.RawQuery = "versioning" + resp, err := http.DefaultClient.Do(req) + c.Assert(err, check.IsNil) + c.Check(resp.Header.Get("Content-Type"), check.Equals, "application/xml") + buf, err := ioutil.ReadAll(resp.Body) + c.Assert(err, check.IsNil) + c.Check(string(buf), check.Equals, "\n\n") + } +} + +func (s *IntegrationSuite) TestS3CollectionList(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + + var markers int + for markers, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} { + dirs := 2 + filesPerDir := 1001 + stage.writeBigDirs(c, dirs, filesPerDir) + // Total # objects is: + // 2 file entries from s3setup (emptyfile and sailboat.txt) + // +1 fake "directory" marker from s3setup (emptydir) (if enabled) + // +dirs fake "directory" marker from writeBigDirs (dir0/, dir1/) (if enabled) + // +filesPerDir*dirs file entries from writeBigDirs (dir0/file0.txt, etc.) + s.testS3List(c, stage.collbucket, "", 4000, markers+2+(filesPerDir+markers)*dirs) + s.testS3List(c, stage.collbucket, "", 131, markers+2+(filesPerDir+markers)*dirs) + s.testS3List(c, stage.collbucket, "dir0/", 71, filesPerDir+markers) + } +} +func (s *IntegrationSuite) testS3List(c *check.C, bucket *s3.Bucket, prefix string, pageSize, expectFiles int) { + c.Logf("testS3List: prefix=%q pageSize=%d S3FolderObjects=%v", prefix, pageSize, s.testServer.Config.cluster.Collections.S3FolderObjects) + expectPageSize := pageSize + if expectPageSize > 1000 { + expectPageSize = 1000 + } + gotKeys := map[string]s3.Key{} + nextMarker := "" + pages := 0 + for { + resp, err := bucket.List(prefix, "", nextMarker, pageSize) + if !c.Check(err, check.IsNil) { + break + } + c.Check(len(resp.Contents) <= expectPageSize, check.Equals, true) + if pages++; !c.Check(pages <= (expectFiles/expectPageSize)+1, check.Equals, true) { + break + } + for _, key := range resp.Contents { + gotKeys[key.Key] = key + if strings.Contains(key.Key, "sailboat.txt") { + c.Check(key.Size, check.Equals, int64(4)) + } + } + if !resp.IsTruncated { + c.Check(resp.NextMarker, check.Equals, "") + break + } + if !c.Check(resp.NextMarker, check.Not(check.Equals), "") { + break + } + nextMarker = resp.NextMarker + } + c.Check(len(gotKeys), check.Equals, expectFiles) +} + +func (s *IntegrationSuite) TestS3CollectionListRollup(c *check.C) { + for _, s.testServer.Config.cluster.Collections.S3FolderObjects = range []bool{false, true} { + s.testS3CollectionListRollup(c) + } +} + +func (s *IntegrationSuite) testS3CollectionListRollup(c *check.C) { + stage := s.s3setup(c) + defer stage.teardown(c) + + dirs := 2 + filesPerDir := 500 + stage.writeBigDirs(c, dirs, filesPerDir) + err := stage.collbucket.PutReader("dingbats", &bytes.Buffer{}, 0, "application/octet-stream", s3.Private, s3.Options{}) + c.Assert(err, check.IsNil) + var allfiles []string + for marker := ""; ; { + resp, err := stage.collbucket.List("", "", marker, 20000) + c.Check(err, check.IsNil) + for _, key := range resp.Contents { + if len(allfiles) == 0 || allfiles[len(allfiles)-1] != key.Key { + allfiles = append(allfiles, key.Key) + } + } + marker = resp.NextMarker + if marker == "" { + break + } + } + markers := 0 + if s.testServer.Config.cluster.Collections.S3FolderObjects { + markers = 1 + } + c.Check(allfiles, check.HasLen, dirs*(filesPerDir+markers)+3+markers) + + gotDirMarker := map[string]bool{} + for _, name := range allfiles { + isDirMarker := strings.HasSuffix(name, "/") + if markers == 0 { + c.Check(isDirMarker, check.Equals, false, check.Commentf("name %q", name)) + } else if isDirMarker { + gotDirMarker[name] = true + } else if i := strings.LastIndex(name, "/"); i >= 0 { + c.Check(gotDirMarker[name[:i+1]], check.Equals, true, check.Commentf("name %q", name)) + gotDirMarker[name[:i+1]] = true // skip redundant complaints about this dir marker + } + } + + for _, trial := range []struct { + prefix string + delimiter string + marker string + }{ + {"", "", ""}, + {"di", "/", ""}, + {"di", "r", ""}, + {"di", "n", ""}, + {"dir0", "/", ""}, + {"dir0/", "/", ""}, + {"dir0/f", "/", ""}, + {"dir0", "", ""}, + {"dir0/", "", ""}, + {"dir0/f", "", ""}, + {"dir0", "/", "dir0/file14.txt"}, // no commonprefixes + {"", "", "dir0/file14.txt"}, // middle page, skip walking dir1 + {"", "", "dir1/file14.txt"}, // middle page, skip walking dir0 + {"", "", "dir1/file498.txt"}, // last page of results + {"dir1/file", "", "dir1/file498.txt"}, // last page of results, with prefix + {"dir1/file", "/", "dir1/file498.txt"}, // last page of results, with prefix + delimiter + {"dir1", "Z", "dir1/file498.txt"}, // delimiter "Z" never appears + {"dir2", "/", ""}, // prefix "dir2" does not exist + {"", "/", ""}, + } { + c.Logf("\n\n=== trial %+v markers=%d", trial, markers) + + maxKeys := 20 + resp, err := stage.collbucket.List(trial.prefix, trial.delimiter, trial.marker, maxKeys) + c.Check(err, check.IsNil) + if resp.IsTruncated && trial.delimiter == "" { + // goamz List method fills in the missing + // NextMarker field if resp.IsTruncated, so + // now we can't really tell whether it was + // sent by the server or by goamz. In cases + // where it should be empty but isn't, assume + // it's goamz's fault. + resp.NextMarker = "" + } + + var expectKeys []string + var expectPrefixes []string + var expectNextMarker string + var expectTruncated bool + for _, key := range allfiles { + full := len(expectKeys)+len(expectPrefixes) >= maxKeys + if !strings.HasPrefix(key, trial.prefix) || key < trial.marker { + continue + } else if idx := strings.Index(key[len(trial.prefix):], trial.delimiter); trial.delimiter != "" && idx >= 0 { + prefix := key[:len(trial.prefix)+idx+1] + if len(expectPrefixes) > 0 && expectPrefixes[len(expectPrefixes)-1] == prefix { + // same prefix as previous key + } else if full { + expectNextMarker = key + expectTruncated = true + } else { + expectPrefixes = append(expectPrefixes, prefix) + } + } else if full { + if trial.delimiter != "" { + expectNextMarker = key + } + expectTruncated = true + break + } else { + expectKeys = append(expectKeys, key) + } + } + + var gotKeys []string + for _, key := range resp.Contents { + gotKeys = append(gotKeys, key.Key) + } + var gotPrefixes []string + for _, prefix := range resp.CommonPrefixes { + gotPrefixes = append(gotPrefixes, prefix) + } + commentf := check.Commentf("trial %+v markers=%d", trial, markers) + c.Check(gotKeys, check.DeepEquals, expectKeys, commentf) + c.Check(gotPrefixes, check.DeepEquals, expectPrefixes, commentf) + c.Check(resp.NextMarker, check.Equals, expectNextMarker, commentf) + c.Check(resp.IsTruncated, check.Equals, expectTruncated, commentf) + c.Logf("=== trial %+v keys %q prefixes %q nextMarker %q", trial, gotKeys, gotPrefixes, resp.NextMarker) + } +} diff --git a/services/keep-web/server.go b/services/keep-web/server.go index 46dc3d3017..8f623c627d 100644 --- a/services/keep-web/server.go +++ b/services/keep-web/server.go @@ -20,12 +20,12 @@ type server struct { Config *Config } -func (srv *server) Start() error { +func (srv *server) Start(logger *logrus.Logger) error { h := &handler{Config: srv.Config} reg := prometheus.NewRegistry() h.Config.Cache.registry = reg - ctx := ctxlog.Context(context.Background(), logrus.StandardLogger()) - mh := httpserver.Instrument(reg, nil, httpserver.HandlerWithContext(ctx, httpserver.AddRequestIDs(httpserver.LogRequests(h)))) + ctx := ctxlog.Context(context.Background(), logger) + mh := httpserver.Instrument(reg, logger, httpserver.HandlerWithContext(ctx, httpserver.AddRequestIDs(httpserver.LogRequests(h)))) h.MetricsAPI = mh.ServeAPI(h.Config.cluster.ManagementToken, http.NotFoundHandler()) srv.Handler = mh var listen arvados.URL diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go index bca7ff49fa..c37852a128 100644 --- a/services/keep-web/server_test.go +++ b/services/keep-web/server_test.go @@ -442,7 +442,7 @@ func (s *IntegrationSuite) SetUpTest(c *check.C) { cfg.cluster.ManagementToken = arvadostest.ManagementToken cfg.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken s.testServer = &server{Config: cfg} - err = s.testServer.Start() + err = s.testServer.Start(ctxlog.TestLogger(c)) c.Assert(err, check.Equals, nil) } diff --git a/tools/arvbox/bin/arvbox b/tools/arvbox/bin/arvbox index 7d45ba17c1..8f13215bcf 100755 --- a/tools/arvbox/bin/arvbox +++ b/tools/arvbox/bin/arvbox @@ -201,7 +201,8 @@ run() { --publish=9002:9002 --publish=25101:25101 --publish=8001:8001 - --publish=8002:8002" + --publish=8002:8002 + --publish=45000-45020:45000-45020" else PUBLIC="" fi diff --git a/tools/compute-images/arvados-images-azure.json b/tools/compute-images/arvados-images-azure.json index f7fc1a07b4..c8db9499cd 100644 --- a/tools/compute-images/arvados-images-azure.json +++ b/tools/compute-images/arvados-images-azure.json @@ -1,6 +1,5 @@ { "variables": { - "storage_account": null, "resource_group": null, "client_id": "{{env `ARM_CLIENT_ID`}}", "client_secret": "{{env `ARM_CLIENT_SECRET`}}", @@ -30,11 +29,8 @@ "subscription_id": "{{user `subscription_id`}}", "tenant_id": "{{user `tenant_id`}}", - "resource_group_name": "{{user `resource_group`}}", - "storage_account": "{{user `storage_account`}}", - - "capture_container_name": "images", - "capture_name_prefix": "{{user `arvados_cluster`}}-compute", + "managed_image_resource_group_name": "{{user `resource_group`}}", + "managed_image_name": "{{user `arvados_cluster`}}-compute-v{{ timestamp }}", "ssh_username": "{{user `ssh_user`}}", "ssh_private_key_file": "{{user `ssh_private_key_file`}}", diff --git a/tools/compute-images/build.sh b/tools/compute-images/build.sh index e8265ae198..030eb410b8 100755 --- a/tools/compute-images/build.sh +++ b/tools/compute-images/build.sh @@ -43,8 +43,6 @@ Options: Azure secrets file which will be sourced from this script --azure-resource-group (default: false, required if building for Azure) Azure resource group - --azure-storage-account (default: false, required if building for Azure) - Azure storage account --azure-location (default: false, required if building for Azure) Azure location, e.g. centralus, eastus, westeurope --azure-sku (default: unset, required if building for Azure, e.g. 16.04-LTS) @@ -76,7 +74,6 @@ GCP_ACCOUNT_FILE= GCP_ZONE= AZURE_SECRETS_FILE= AZURE_RESOURCE_GROUP= -AZURE_STORAGE_ACCOUNT= AZURE_LOCATION= AZURE_CLOUD_ENVIRONMENT= DEBUG= @@ -86,7 +83,7 @@ AWS_DEFAULT_REGION=us-east-1 PUBLIC_KEY_FILE= PARSEDOPTS=$(getopt --name "$0" --longoptions \ - help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-storage-account:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,domain:,resolver:,reposuffix:,public-key-file:,debug \ + help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,domain:,resolver:,reposuffix:,public-key-file:,debug \ -- "" "$@") if [ $? -ne 0 ]; then exit 1 @@ -139,9 +136,6 @@ while [ $# -gt 0 ]; do --azure-resource-group) AZURE_RESOURCE_GROUP="$2"; shift ;; - --azure-storage-account) - AZURE_STORAGE_ACCOUNT="$2"; shift - ;; --azure-location) AZURE_LOCATION="$2"; shift ;; @@ -248,9 +242,6 @@ fi if [[ "$AZURE_RESOURCE_GROUP" != "" ]]; then EXTRA2+=" -var resource_group=$AZURE_RESOURCE_GROUP" fi -if [[ "$AZURE_STORAGE_ACCOUNT" != "" ]]; then - EXTRA2+=" -var storage_account=$AZURE_STORAGE_ACCOUNT" -fi if [[ "$AZURE_LOCATION" != "" ]]; then EXTRA2+=" -var location=$AZURE_LOCATION" fi diff --git a/tools/copy-tutorial/copy-tutorial.sh b/tools/copy-tutorial/copy-tutorial.sh new file mode 100755 index 0000000000..bdc75da2e1 --- /dev/null +++ b/tools/copy-tutorial/copy-tutorial.sh @@ -0,0 +1,25 @@ +#!/bin/sh +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +set -e + +if test -z "$1" ; then + echo "$0: Copies Arvados tutorial resources from public data cluster (jutro)" + echo "Usage: copy-tutorial.sh " + echo " is destination cluster configuration that can be found in ~/.config/arvados" + exit +fi + +echo "Copying from public data cluster (jutro) to $1" + +for a in $(cat $HOME/.config/arvados/$1.conf) ; do export $a ; done + +project_uuid=$(arv --format=uuid group create --group '{"name":"User guide resources", "group_class": "project"}') + +# Bwa-mem workflow +arv-copy --src jutro --dst $1 --project-uuid=$project_uuid f141fc27e7cfa7f7b6d208df5e0ee01b+59 +arv-copy --src jutro --dst $1 --project-uuid=$project_uuid jutro-7fd4e-mkmmq53m1ze6apx + +echo "Data copied to \"User guide resources\" at $project_uuid"