source 'https://rubygems.org'
-gem 'rails', '~> 5.0.0'
+gem 'rails', '~> 5.2.0'
gem 'arvados', git: 'https://github.com/arvados/arvados.git', glob: 'sdk/ruby/arvados.gemspec'
gem 'activerecord-nulldb-adapter', git: 'https://github.com/arvados/nulldb'
gem 'mime-types'
gem 'responders', '~> 2.0'
+# Pin sprockets to < 4.0 to avoid issues when upgrading rails to 5.2
+# See: https://github.com/rails/sprockets-rails/issues/443
+gem 'sprockets', '~> 3.0'
+
+# Fast app boot times
+gem 'bootsnap', require: false
+
# Note: keeping this out of the "group :assets" section "may" allow us
# to use Coffescript for UJS responses. It also prevents a
# warning/problem when running tests: "WARN: tilt autoloading
gem 'therubyracer', :platforms => :ruby
end
-group :development do
+group :development, :test, :performance do
gem 'byebug'
+ # Pinning launchy because 2.5 requires ruby >= 2.4, which arvbox currently
+ # doesn't have because of SSO.
+ gem 'launchy', '~> 2.4.0'
+end
+
+group :development do
gem 'ruby-debug-passenger'
gem 'rack-mini-profiler', require: false
gem 'flamegraph', require: false
end
group :test, :performance do
- gem 'byebug'
gem 'rails-perftest'
gem 'ruby-prof'
gem 'rvm-capistrano'
gem 'less'
gem 'less-rails'
-
-# Wiselinks hasn't been updated for many years and it's using deprecated methods
-# Use our own Wiselinks fork until this PR is accepted:
-# https://github.com/igor-alexandrov/wiselinks/pull/116
-# gem 'wiselinks', git: 'https://github.com/arvados/wiselinks.git', branch: 'rails-5.1-compatibility'
-
gem 'sshkey'
# To use ActiveModel has_secure_password
remote: https://rubygems.org/
specs:
RedCloth (4.3.2)
- actioncable (5.0.7.2)
- actionpack (= 5.0.7.2)
- nio4r (>= 1.2, < 3.0)
- websocket-driver (~> 0.6.1)
- actionmailer (5.0.7.2)
- actionpack (= 5.0.7.2)
- actionview (= 5.0.7.2)
- activejob (= 5.0.7.2)
+ actioncable (5.2.4.3)
+ actionpack (= 5.2.4.3)
+ nio4r (~> 2.0)
+ websocket-driver (>= 0.6.1)
+ actionmailer (5.2.4.3)
+ actionpack (= 5.2.4.3)
+ actionview (= 5.2.4.3)
+ activejob (= 5.2.4.3)
mail (~> 2.5, >= 2.5.4)
rails-dom-testing (~> 2.0)
- actionpack (5.0.7.2)
- actionview (= 5.0.7.2)
- activesupport (= 5.0.7.2)
- rack (~> 2.0)
- rack-test (~> 0.6.3)
+ actionpack (5.2.4.3)
+ actionview (= 5.2.4.3)
+ activesupport (= 5.2.4.3)
+ rack (~> 2.0, >= 2.0.8)
+ rack-test (>= 0.6.3)
rails-dom-testing (~> 2.0)
rails-html-sanitizer (~> 1.0, >= 1.0.2)
- actionview (5.0.7.2)
- activesupport (= 5.0.7.2)
+ actionview (5.2.4.3)
+ activesupport (= 5.2.4.3)
builder (~> 3.1)
- erubis (~> 2.7.0)
+ erubi (~> 1.4)
rails-dom-testing (~> 2.0)
rails-html-sanitizer (~> 1.0, >= 1.0.3)
- activejob (5.0.7.2)
- activesupport (= 5.0.7.2)
+ activejob (5.2.4.3)
+ activesupport (= 5.2.4.3)
globalid (>= 0.3.6)
- activemodel (5.0.7.2)
- activesupport (= 5.0.7.2)
- activerecord (5.0.7.2)
- activemodel (= 5.0.7.2)
- activesupport (= 5.0.7.2)
- arel (~> 7.0)
- activesupport (5.0.7.2)
+ activemodel (5.2.4.3)
+ activesupport (= 5.2.4.3)
+ activerecord (5.2.4.3)
+ activemodel (= 5.2.4.3)
+ activesupport (= 5.2.4.3)
+ arel (>= 9.0)
+ activestorage (5.2.4.3)
+ actionpack (= 5.2.4.3)
+ activerecord (= 5.2.4.3)
+ marcel (~> 0.3.1)
+ activesupport (5.2.4.3)
concurrent-ruby (~> 1.0, >= 1.0.2)
i18n (>= 0.7, < 2)
minitest (~> 5.1)
public_suffix (>= 2.0.2, < 5.0)
andand (1.3.3)
angularjs-rails (1.3.15)
- arel (7.1.4)
- arvados-google-api-client (0.8.7.3)
- activesupport (>= 3.2, < 5.1)
+ arel (9.0.0)
+ arvados-google-api-client (0.8.7.4)
+ activesupport (>= 3.2, < 5.3)
addressable (~> 2.3)
autoparse (~> 0.3)
extlib (~> 0.9)
multi_json (>= 1.0.0)
autoprefixer-rails (9.5.1.1)
execjs
+ bootsnap (1.4.7)
+ msgpack (~> 1.0)
bootstrap-sass (3.4.1)
autoprefixer-rails (>= 5.2.1)
sassc (>= 2.0.0)
railties (>= 3.1)
bootstrap-x-editable-rails (1.5.1.1)
railties (>= 3.0)
- builder (3.2.3)
+ builder (3.2.4)
byebug (11.0.1)
capistrano (2.15.9)
highline
execjs
coffee-script-source (1.12.2)
commonjs (0.2.7)
- concurrent-ruby (1.1.5)
- crass (1.0.5)
+ concurrent-ruby (1.1.6)
+ crass (1.0.6)
deep_merge (1.2.1)
docile (1.3.1)
- erubis (2.7.0)
+ erubi (1.9.0)
execjs (2.7.0)
extlib (0.9.16)
faraday (0.15.4)
railties (>= 4)
request_store (~> 1.0)
logstash-event (1.2.02)
- loofah (2.3.1)
+ loofah (2.6.0)
crass (~> 1.0.2)
nokogiri (>= 1.5.9)
mail (2.7.1)
mini_mime (>= 0.1.1)
+ marcel (0.3.3)
+ mimemagic (~> 0.3.2)
memoist (0.16.2)
metaclass (0.0.4)
- method_source (0.9.2)
+ method_source (1.0.0)
mime-types (3.2.2)
mime-types-data (~> 3.2015)
mime-types-data (3.2019.0331)
- mini_mime (1.0.1)
+ mimemagic (0.3.5)
+ mini_mime (1.0.2)
mini_portile2 (2.4.0)
minitest (5.10.3)
mocha (1.8.0)
metaclass (~> 0.0.1)
morrisjs-rails (0.5.1.2)
railties (> 3.1, < 6)
- multi_json (1.14.1)
+ msgpack (1.3.3)
+ multi_json (1.15.0)
multipart-post (2.1.1)
net-scp (2.0.0)
net-ssh (>= 2.6.5, < 6.0.0)
net-ssh (5.2.0)
net-ssh-gateway (2.0.0)
net-ssh (>= 4.0.0)
- nio4r (2.3.1)
- nokogiri (1.10.8)
+ nio4r (2.5.2)
+ nokogiri (1.10.10)
mini_portile2 (~> 2.4.0)
npm-rails (0.2.1)
rails (>= 3.2)
oj (3.7.12)
- os (1.0.1)
+ os (1.1.1)
passenger (6.0.2)
rack
rake (>= 0.8.1)
cliver (~> 0.3.1)
multi_json (~> 1.0)
websocket-driver (>= 0.2.0)
- public_suffix (4.0.3)
+ public_suffix (4.0.5)
rack (2.2.3)
rack-mini-profiler (1.0.2)
rack (>= 1.2.0)
- rack-test (0.6.3)
- rack (>= 1.0)
- rails (5.0.7.2)
- actioncable (= 5.0.7.2)
- actionmailer (= 5.0.7.2)
- actionpack (= 5.0.7.2)
- actionview (= 5.0.7.2)
- activejob (= 5.0.7.2)
- activemodel (= 5.0.7.2)
- activerecord (= 5.0.7.2)
- activesupport (= 5.0.7.2)
+ rack-test (1.1.0)
+ rack (>= 1.0, < 3)
+ rails (5.2.4.3)
+ actioncable (= 5.2.4.3)
+ actionmailer (= 5.2.4.3)
+ actionpack (= 5.2.4.3)
+ actionview (= 5.2.4.3)
+ activejob (= 5.2.4.3)
+ activemodel (= 5.2.4.3)
+ activerecord (= 5.2.4.3)
+ activestorage (= 5.2.4.3)
+ activesupport (= 5.2.4.3)
bundler (>= 1.3.0)
- railties (= 5.0.7.2)
+ railties (= 5.2.4.3)
sprockets-rails (>= 2.0.0)
rails-controller-testing (1.0.4)
actionpack (>= 5.0.1.x)
rails-dom-testing (2.0.3)
activesupport (>= 4.2.0)
nokogiri (>= 1.6)
- rails-html-sanitizer (1.0.4)
- loofah (~> 2.2, >= 2.2.2)
+ rails-html-sanitizer (1.3.0)
+ loofah (~> 2.3)
rails-perftest (0.0.7)
- railties (5.0.7.2)
- actionpack (= 5.0.7.2)
- activesupport (= 5.0.7.2)
+ railties (5.2.4.3)
+ actionpack (= 5.2.4.3)
+ activesupport (= 5.2.4.3)
method_source
rake (>= 0.8.7)
- thor (>= 0.18.1, < 2.0)
+ thor (>= 0.19.0, < 2.0)
rake (13.0.1)
raphael-rails (2.1.2)
rb-fsevent (0.10.3)
therubyracer (0.12.3)
libv8 (~> 3.16.14.15)
ref
- thor (0.20.3)
+ thor (1.0.1)
thread_safe (0.3.6)
tilt (2.0.9)
- tzinfo (1.2.6)
+ tzinfo (1.2.7)
thread_safe (~> 0.1)
uglifier (2.7.2)
execjs (>= 0.3.0)
json (>= 1.8.0)
- websocket-driver (0.6.5)
+ websocket-driver (0.7.3)
websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.5)
xpath (2.1.0)
andand
angularjs-rails (~> 1.3.8)
arvados!
+ bootsnap
bootstrap-sass (~> 3.4.1)
bootstrap-tab-history-rails
bootstrap-x-editable-rails
headless (~> 1.0.2)
httpclient (~> 2.5)
jquery-rails
+ launchy (~> 2.4.0)
less
less-rails
lograge
piwik_analytics
poltergeist (~> 1.5.1)
rack-mini-profiler
- rails (~> 5.0.0)
+ rails (~> 5.2.0)
rails-controller-testing
rails-perftest
raphael-rails
signet (< 0.12)
simplecov (~> 0.7)
simplecov-rcov
+ sprockets (~> 3.0)
sshkey
themes_for_rails!
therubyracer
uglifier (~> 2.0)
BUNDLED WITH
- 1.16.6
+ 1.17.3
begin
rescue_from(ActiveRecord::RecordNotFound,
ActionController::RoutingError,
- ActionController::UnknownController,
AbstractController::ActionNotFound,
with: :render_not_found)
rescue_from(Exception,
+++ /dev/null
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-class ApplicationRecord < ActiveRecord::Base
- self.abstract_class = true
-end
\ No newline at end of file
end
end
+ # The ActiveModel::Dirty API was changed on Rails 5.2
+ # See: https://github.com/rails/rails/commit/c3675f50d2e59b7fc173d7b332860c4b1a24a726#diff-aaddd42c7feb0834b1b5c66af69814d3
+ def mutations_from_database
+ @mutations_from_database ||= ActiveModel::NullMutationTracker.instance
+ end
+
def self.columns
@discovered_columns = [] if !defined?(@discovered_columns)
return @discovered_columns if @discovered_columns.andand.any?
#
# SPDX-License-Identifier: AGPL-3.0
-ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
+ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../Gemfile', __dir__)
load Gem.bin_path('bundler', 'bundle')
#
# SPDX-License-Identifier: AGPL-3.0
-require 'pathname'
require 'fileutils'
include FileUtils
# path to your application root.
-APP_ROOT = Pathname.new File.expand_path('../../', __FILE__)
+APP_ROOT = File.expand_path('..', __dir__)
def system!(*args)
system(*args) || abort("\n== Command #{args} failed ==")
system! 'gem install bundler --conservative'
system('bundle check') || system!('bundle install')
+ # Install JavaScript dependencies if using Yarn
+ # system('bin/yarn')
+
# puts "\n== Copying sample files =="
# unless File.exist?('config/database.yml')
# cp 'config/database.yml.sample', 'config/database.yml'
system! 'gem install bundler --conservative'
system('bundle check') || system!('bundle install')
+ # Install JavaScript dependencies if using Yarn
+ # system('bin/yarn')
+
puts "\n== Updating database =="
system! 'bin/rails db:migrate'
--- /dev/null
+#!/usr/bin/env ruby
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+APP_ROOT = File.expand_path('..', __dir__)
+Dir.chdir(APP_ROOT) do
+ begin
+ exec "yarnpkg #{ARGV.join(" ")}"
+ rescue Errno::ENOENT
+ $stderr.puts "Yarn executable was not detected in the system."
+ $stderr.puts "Download Yarn at https://yarnpkg.com/en/docs/install"
+ exit 1
+ end
+end
action_mailer.delivery_method: :test
active_support.deprecation: :stderr
profiling_enabled: true
- secret_token: <%= rand(2**256).to_s(36) %>
secret_key_base: <%= rand(2**256).to_s(36) %>
site_name: Workbench:test
#
# SPDX-License-Identifier: AGPL-3.0
-require File.expand_path('../boot', __FILE__)
+require_relative 'boot'
require "rails"
# Pick only the frameworks we need:
require "active_model/railtie"
require "active_job/railtie"
require "active_record/railtie"
+# Skip ActiveStorage (new in Rails 5.1)
+# require "active_storage/engine"
require "action_controller/railtie"
require "action_mailer/railtie"
require "action_view/railtie"
require_relative "arvados_config.rb"
+ # Initialize configuration defaults for originally generated Rails version.
+ config.load_defaults 5.1
+
# Settings in config/environments/* take precedence over those specified here.
# Application configuration should go into files in config/initializers
# -- all .rb files in that directory are automatically loaded.
ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
require 'bundler/setup' if File.exists?(ENV['BUNDLE_GEMFILE'])
+require 'bootsnap/setup' # Speed up boot time by caching expensive operations.
# Use ARVADOS_API_TOKEN environment variable (if set) in console
require 'rails'
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# Be sure to restart your server when you modify this file.
+
+# Define an application-wide content security policy
+# For further information see the following documentation
+# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy
+
+# Rails.application.config.content_security_policy do |policy|
+# policy.default_src :self, :https
+# policy.font_src :self, :https, :data
+# policy.img_src :self, :https, :data
+# policy.object_src :none
+# policy.script_src :self, :https
+# policy.style_src :self, :https
+
+# # Specify URI for violation reports
+# # policy.report_uri "/csp-violation-report-endpoint"
+# end
+
+# If you are using UJS then enable automatic nonce generation
+# Rails.application.config.content_security_policy_nonce_generator = -> request { SecureRandom.base64(16) }
+
+# Report CSP violations to a specified URI
+# For further information see the following documentation:
+# https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy-Report-Only
+# Rails.application.config.content_security_policy_report_only = true
# Require `belongs_to` associations by default. Previous versions had false.
Rails.application.config.active_record.belongs_to_required_by_default = false
-
-# Do not halt callback chains when a callback returns false. Previous versions had true.
-ActiveSupport.halt_callback_chains_on_return_false = true
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# Be sure to restart your server when you modify this file.
+#
+# This file contains migration options to ease your Rails 5.1 upgrade.
+#
+# Once upgraded flip defaults one by one to migrate to the new default.
+#
+# Read the Guide for Upgrading Ruby on Rails for more info on each option.
+
+# Make `form_with` generate non-remote forms.
+Rails.application.config.action_view.form_with_generates_remote_forms = false
+
+# Unknown asset fallback will return the path passed in when the given
+# asset is not present in the asset pipeline.
+# Rails.application.config.assets.unknown_asset_fallback = false
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# Be sure to restart your server when you modify this file.
+#
+# This file contains migration options to ease your Rails 5.2 upgrade.
+#
+# Once upgraded flip defaults one by one to migrate to the new default.
+#
+# Read the Guide for Upgrading Ruby on Rails for more info on each option.
+
+# Make Active Record use stable #cache_key alongside new #cache_version method.
+# This is needed for recyclable cache keys.
+# Rails.application.config.active_record.cache_versioning = true
+
+# Use AES-256-GCM authenticated encryption for encrypted cookies.
+# Also, embed cookie expiry in signed or encrypted cookies for increased security.
+#
+# This option is not backwards compatible with earlier Rails versions.
+# It's best enabled when your entire app is migrated and stable on 5.2.
+#
+# Existing cookies will be converted on read then written with the new scheme.
+# Rails.application.config.action_dispatch.use_authenticated_cookie_encryption = true
+
+# Use AES-256-GCM authenticated encryption as default cipher for encrypting messages
+# instead of AES-256-CBC, when use_authenticated_message_encryption is set to true.
+# Rails.application.config.active_support.use_authenticated_message_encryption = true
+
+# Add default protection from forgery to ActionController::Base instead of in
+# ApplicationController.
+# Rails.application.config.action_controller.default_protect_from_forgery = true
+
+# Store boolean values are in sqlite3 databases as 1 and 0 instead of 't' and
+# 'f' after migrating old data.
+# Rails.application.config.active_record.sqlite3.represent_boolean_as_integer = true
+
+# Use SHA-1 instead of MD5 to generate non-sensitive digests, such as the ETag header.
+# Rails.application.config.active_support.use_sha1_digests = true
+
+# Make `form_with` generate id attributes for any generated HTML tags.
+# Rails.application.config.action_view.form_with_generates_ids = true
#
# SPDX-License-Identifier: AGPL-3.0
-ArvadosWorkbench::Application.routes.draw do
+Rails.application.routes.draw do
themes_for_rails
resources :keep_disks
# no regular words or you'll be exposed to dictionary attacks.
# You can use `rails secret` to generate a secure secret key.
-# Make sure the secrets in this file are kept private
-# if you're sharing your code publicly.
+# NOTE that these get overriden by Arvados' own configuration system.
-development:
- secret_key_base: 33e2d171ec6c67cf8e9a9fbfadc1071328bdab761297e2fe28b9db7613dd542c1ba3bdb3bd3e636d1d6f74ab73a2d90c4e9c0ecc14fde8ccd153045f94e9cc41
+# development:
+# secret_key_base: <%= rand(1<<255).to_s(36) %>
-test:
- secret_key_base: d4c07cab3530fccf5d86565ecdc359eb2a853b8ede3b06edb2885e4423d7a726f50a3e415bb940fd4861e8fec16459665fd377acc8cdd98ea63294d2e0d12bb2
+# test:
+# secret_key_base: <%= rand(1<<255).to_s(36) %>
-# Do not keep production secrets in the repository,
-# instead read values from the environment.
+# In case this doesn't get overriden for some reason, assign a random key
+# to gracefully degrade by rejecting cookies instead of by opening a
+# vulnerability.
production:
- secret_key_base: <%= ENV["SECRET_KEY_BASE"] %>
+ secret_key_base: <%= rand(1<<255).to_s(36) %>
FORCE=-f
fi
-#docker export arvados/jobs:$cwl_runner_version_orig | docker import - arvados/jobs:$cwl_runner_version_orig
-
if ! [[ -z "$version_tag" ]]; then
docker tag $FORCE arvados/jobs:$cwl_runner_version_orig arvados/jobs:"$version_tag"
-else
- docker tag $FORCE arvados/jobs:$cwl_runner_version_orig arvados/jobs:latest
-fi
+ ECODE=$?
-ECODE=$?
+ if [[ "$ECODE" != "0" ]]; then
+ EXITCODE=$(($EXITCODE + $ECODE))
+ fi
-if [[ "$ECODE" != "0" ]]; then
- EXITCODE=$(($EXITCODE + $ECODE))
+ checkexit $ECODE "docker tag"
+ title "docker tag complete (`timer`)"
fi
-checkexit $ECODE "docker tag"
-title "docker tag complete (`timer`)"
-
title "uploading images"
timer_reset
-if [[ "$ECODE" != "0" ]]; then
+if [[ "$EXITCODE" != "0" ]]; then
title "upload arvados images SKIPPED because build or tag failed"
else
if [[ $upload == true ]]; then
docker_push arvados/jobs:"$version_tag"
else
docker_push arvados/jobs:$cwl_runner_version_orig
- docker_push arvados/jobs:latest
fi
title "upload arvados images finished (`timer`)"
else
clear_temp() {
if [[ -z "$temp" ]]; then
- # we didn't even get as far as making a temp dir
+ # we did not even get as far as making a temp dir
:
elif [[ -z "$temp_preserve" ]]; then
+ # Go creates readonly dirs in the module cache, which cause
+ # "rm -rf" to fail unless we chmod first.
+ chmod -R u+w "$temp"
rm -rf "$temp"
else
echo "Leaving behind temp dirs in $temp"
( [[ -n "$WORKSPACE" ]] && [[ -d "$WORKSPACE/services" ]] ) \
|| fatal "WORKSPACE environment variable not set to a source directory (see: $0 --help)"
[[ -z "$CONFIGSRC" ]] || [[ -s "$CONFIGSRC/config.yml" ]] \
- || fatal "CONFIGSRC is $CONFIGSRC but '$CONFIGSRC/config.yml' is empty or not found (see: $0 --help)"
+ || fatal "CONFIGSRC is $CONFIGSRC but '$CONFIGSRC/config.yml' is empty or not found (see: $0 --help)"
echo Checking dependencies:
echo "locale: ${LANG}"
[[ "$(locale charmap)" = "UTF-8" ]] \
fi
if [[ $NEED_SDK_R == false ]]; then
- echo "R SDK not needed, it will not be installed."
+ echo "R SDK not needed, it will not be installed."
fi
checkpidfile() {
. "$VENVDIR/bin/activate"
echo 'Starting API, controller, keepproxy, keep-web, arv-git-httpd, ws, and nginx ssl proxy...'
if [[ ! -d "$WORKSPACE/services/api/log" ]]; then
- mkdir -p "$WORKSPACE/services/api/log"
+ mkdir -p "$WORKSPACE/services/api/log"
fi
# Remove empty api.pid file if it exists
if [[ -f "$WORKSPACE/tmp/api.pid" && ! -s "$WORKSPACE/tmp/api.pid" ]]; then
- rm -f "$WORKSPACE/tmp/api.pid"
+ rm -f "$WORKSPACE/tmp/api.pid"
fi
all_services_stopped=
fail=1
tmpdir_gem_home="$(env - PATH="$PATH" HOME="$GEMHOME" gem env gempath | cut -f1 -d:)"
PATH="$tmpdir_gem_home/bin:$PATH"
- export GEM_PATH="$tmpdir_gem_home"
+ export GEM_PATH="$tmpdir_gem_home:$(gem env gempath)"
echo "Will install dependencies to $(gem env gemdir)"
- echo "Will install arvados gems to $tmpdir_gem_home"
+ echo "Will install bundler and arvados gems to $tmpdir_gem_home"
echo "Gem search path is GEM_PATH=$GEM_PATH"
- bundle="$(gem env gempath | cut -f1 -d:)/bin/bundle"
+ bundle="$tmpdir_gem_home/bin/bundle"
(
export HOME=$GEMHOME
bundlers="$(gem list --details bundler)"
check_arvados_config() {
if [[ "$1" = "env" ]] ; then
- return
+ return
fi
if [[ -z "$ARVADOS_CONFIG" ]] ; then
- # Create config file. The run_test_server script requires PyYAML,
- # so virtualenv needs to be active. Downstream steps like
- # workbench install which require a valid config.yml.
- if [[ ! -s "$VENVDIR/bin/activate" ]] ; then
- install_env
- fi
- . "$VENVDIR/bin/activate"
+ # Create config file. The run_test_server script requires PyYAML,
+ # so virtualenv needs to be active. Downstream steps like
+ # workbench install which require a valid config.yml.
+ if [[ ! -s "$VENVDIR/bin/activate" ]] ; then
+ install_env
+ fi
+ . "$VENVDIR/bin/activate"
cd "$WORKSPACE"
- eval $(python sdk/python/tests/run_test_server.py setup_config)
- deactivate
+ eval $(python sdk/python/tests/run_test_server.py setup_config)
+ deactivate
fi
}
(
set -e
cd "$WORKSPACE/doc"
- ARVADOS_API_HOST=qr1hi.arvadosapi.com
+ ARVADOS_API_HOST=pirca.arvadosapi.com
# Make sure python-epydoc is installed or the next line won't
# do much good!
PYTHONPATH=$WORKSPACE/sdk/python/ "$bundle" exec rake linkchecker baseurl=file://$WORKSPACE/doc/.site/ arvados_workbench_host=https://workbench.$ARVADOS_API_HOST arvados_api_host=$ARVADOS_API_HOST
- Run a workflow using Workbench:
- user/getting_started/workbench.html.textile.liquid
- user/tutorials/tutorial-workflow-workbench.html.textile.liquid
- - user/composer/composer.html.textile.liquid
+ - Working at the Command Line:
+ - user/getting_started/setup-cli.html.textile.liquid
+ - user/reference/api-tokens.html.textile.liquid
+ - user/getting_started/check-environment.html.textile.liquid
- Access an Arvados virtual machine:
- user/getting_started/vm-login-with-webshell.html.textile.liquid
- user/getting_started/ssh-access-unix.html.textile.liquid
- user/getting_started/ssh-access-windows.html.textile.liquid
- - user/getting_started/check-environment.html.textile.liquid
- - user/reference/api-tokens.html.textile.liquid
- Working with data sets:
- user/tutorials/tutorial-keep.html.textile.liquid
- user/tutorials/tutorial-keep-get.html.textile.liquid
- user/tutorials/tutorial-keep-mount-gnu-linux.html.textile.liquid
- user/tutorials/tutorial-keep-mount-os-x.html.textile.liquid
- user/tutorials/tutorial-keep-mount-windows.html.textile.liquid
- - user/topics/keep.html.textile.liquid
- user/tutorials/tutorial-keep-collection-lifecycle.html.textile.liquid
- user/topics/arv-copy.html.textile.liquid
- - user/topics/storage-classes.html.textile.liquid
- user/topics/collection-versioning.html.textile.liquid
- - Working with git repositories:
- - user/tutorials/add-new-repository.html.textile.liquid
- - user/tutorials/git-arvados-guide.html.textile.liquid
- - Running workflows at the command line:
+ - user/topics/storage-classes.html.textile.liquid
+ - user/topics/keep.html.textile.liquid
+ - Data Analysis with Workflows:
- user/cwl/cwl-runner.html.textile.liquid
- user/cwl/cwl-run-options.html.textile.liquid
- - Develop an Arvados workflow:
- - user/tutorials/intro-crunch.html.textile.liquid
- user/tutorials/writing-cwl-workflow.html.textile.liquid
+ - user/topics/arv-docker.html.textile.liquid
- user/cwl/cwl-style.html.textile.liquid
- - user/cwl/federated-workflows.html.textile.liquid
- user/cwl/cwl-extensions.html.textile.liquid
+ - user/cwl/federated-workflows.html.textile.liquid
- user/cwl/cwl-versions.html.textile.liquid
- - user/topics/arv-docker.html.textile.liquid
+ - Working with git repositories:
+ - user/tutorials/add-new-repository.html.textile.liquid
+ - user/tutorials/git-arvados-guide.html.textile.liquid
- Reference:
- user/topics/link-accounts.html.textile.liquid
- user/reference/cookbook.html.textile.liquid
- sdk/python/example.html.textile.liquid
- sdk/python/python.html.textile.liquid
- sdk/python/arvados-fuse.html.textile.liquid
- - sdk/python/events.html.textile.liquid
+ - sdk/python/arvados-cwl-runner.html.textile.liquid
- sdk/python/cookbook.html.textile.liquid
+ - sdk/python/events.html.textile.liquid
- CLI:
- sdk/cli/install.html.textile.liquid
- sdk/cli/index.html.textile.liquid
- admin/migrating-providers.html.textile.liquid
- user/topics/arvados-sync-groups.html.textile.liquid
- admin/scoped-tokens.html.textile.liquid
+ - admin/token-expiration-policy.html.textile.liquid
- Monitoring:
- admin/logging.html.textile.liquid
- admin/metrics.html.textile.liquid
+++ /dev/null
-#!/usr/bin/env python
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-# Import the Arvados sdk module
-import arvados
-
-# Get information about the task from the environment
-this_task = arvados.current_task()
-
-this_task_input = arvados.current_job()['script_parameters']['input']
-
-# Create the object access to the collection referred to in the input
-collection = arvados.CollectionReader(this_task_input)
-
-# Create an object to write a new collection as output
-out = arvados.CollectionWriter()
-
-# Create a new file in the output collection
-with out.open('0-filter.txt') as out_file:
- # Iterate over every input file in the input collection
- for input_file in collection.all_files():
- # Output every line in the file that starts with '0'
- out_file.writelines(line for line in input_file if line.startswith('0'))
-
-# Commit the output to Keep.
-output_locator = out.finish()
-
-# Use the resulting locator as the output for this task.
-this_task.set_output(output_locator)
-
-# Done!
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-<div class="alert alert-block alert-info">
- <button type="button" class="close" data-dismiss="alert">×</button>
- <h4>Hi!</h4>
- <P>This section is incomplete. Please be patient with us as we fill in the blanks — or <A href="https://dev.arvados.org/projects/arvados/wiki/Documentation#Contributing">contribute to the documentation project.</A></P>
-</div>
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-<div class="alert alert-block alert-info">
- <button type="button" class="close" data-dismiss="alert">×</button>
- <h4>Hi!</h4>
- <p>This section is incomplete. Please be patient with us as we fill in the blanks — or <A href="https://dev.arvados.org/projects/arvados/wiki/Documentation#Contributing">contribute to the documentation project.</A></p>
-</div>
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'notebox_begin' %}
-As stated above, arv-copy is recursive by default and requires a working git repository in the destination cluster. If you do not have a repository created, you can follow the "Adding a new repository":{{site.baseurl}}/user/tutorials/add-new-repository.html page. We will use the *tutorial* repository created in that page as the example.
-
-<br/>In addition, arv-copy requires git when copying to a git repository. Please make sure that git is installed and available.
-
-{% include 'notebox_end' %}
+++ /dev/null
-#!/usr/bin/env python
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-import hashlib
-import os
-import arvados
-
-# Jobs consist of one or more tasks. A task is a single invocation of
-# a crunch script.
-
-# Get the current task
-this_task = arvados.current_task()
-
-# Tasks have a sequence number for ordering. All tasks
-# with the current sequence number must finish successfully
-# before tasks in the next sequence are started.
-# The first task has sequence number 0
-if this_task['sequence'] == 0:
- # Get the "input" field from "script_parameters" on the task object
- job_input = arvados.current_job()['script_parameters']['input']
-
- # Create a collection reader to read the input
- cr = arvados.CollectionReader(job_input)
-
- # Loop over each stream in the collection (a stream is a subset of
- # files that logically represents a directory)
- for s in cr.all_streams():
-
- # Loop over each file in the stream
- for f in s.all_files():
-
- # Synthesize a manifest for just this file
- task_input = f.as_manifest()
-
- # Set attributes for a new task:
- # 'job_uuid' the job that this task is part of
- # 'created_by_job_task_uuid' this task that is creating the new task
- # 'sequence' the sequence number of the new task
- # 'parameters' the parameters to be passed to the new task
- new_task_attrs = {
- 'job_uuid': arvados.current_job()['uuid'],
- 'created_by_job_task_uuid': arvados.current_task()['uuid'],
- 'sequence': 1,
- 'parameters': {
- 'input':task_input
- }
- }
-
- # Ask the Arvados API server to create a new task, running the same
- # script as the parent task specified in 'created_by_job_task_uuid'
- arvados.api().job_tasks().create(body=new_task_attrs).execute()
-
- # Now tell the Arvados API server that this task executed successfully,
- # even though it doesn't have any output.
- this_task.set_output(None)
-else:
- # The task sequence was not 0, so it must be a parallel worker task
- # created by the first task
-
- # Instead of getting "input" from the "script_parameters" field of
- # the job object, we get it from the "parameters" field of the
- # task object
- this_task_input = this_task['parameters']['input']
-
- collection = arvados.CollectionReader(this_task_input)
-
- # There should only be one file in the collection, so get the
- # first one from the all files iterator.
- input_file = next(collection.all_files())
- output_path = os.path.normpath(os.path.join(input_file.stream_name(),
- input_file.name))
-
- # Everything after this is the same as the first tutorial.
- digestor = hashlib.new('md5')
- for buf in input_file.readall():
- digestor.update(buf)
-
- out = arvados.CollectionWriter()
- with out.open('md5sum.txt') as out_file:
- out_file.write("{} {}\n".format(digestor.hexdigest(), output_path))
-
- this_task.set_output(out.finish())
-
-# Done!
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'notebox_begin_warning' %}
-This section assumes the legacy Jobs API is available. Some newer installations have already disabled the Jobs API in favor of the Containers API.
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{
- "name": "Example using R in a custom Docker image",
- "components": {
- "Rscript": {
- "script": "run-command",
- "script_version": "master",
- "repository": "arvados",
- "script_parameters": {
- "command": [
- "Rscript",
- "$(glob $(file $(myscript))/*.r)",
- "$(glob $(dir $(mydata))/*.csv)"
- ],
- "myscript": {
- "required": true,
- "dataclass": "Collection"
- },
- "mydata": {
- "required": true,
- "dataclass": "Collection"
- }
- },
- "runtime_constraints": {
- "docker_image": "arvados/jobs-with-r"
- }
- }
- }
-}
h2(#cgroups). Configure Linux cgroups accounting
-Linux can report what compute resources are used by processes in a specific cgroup or Docker container. Crunch can use these reports to share that information with users running compute work. This can help pipeline authors debug and optimize their workflows.
+Linux can report what compute resources are used by processes in a specific cgroup or Docker container. Crunch can use these reports to share that information with users running compute work. This can help workflow authors debug and optimize their workflows.
To enable cgroups accounting, you must boot Linux with the command line parameters @cgroup_enable=memory swapaccount=1@.
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'notebox_begin' %}
-The Arvados API and Git servers require Git 1.7.10 or later.
-{% include 'notebox_end' %}
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-Now that all your configuration is in place, rerun the {{railspkg}} package configuration to install necessary Ruby Gems and other server dependencies. On Debian-based systems:
-
-<notextile><pre><code>~$ <span class="userinput">sudo dpkg-reconfigure {{railspkg}}</span>
-</code></pre></notextile>
-
-On Red Hat-based systems:
-
-<notextile><pre><code>~$ <span class="userinput">sudo yum reinstall {{railspkg}}</span>
-</code></pre></notextile>
-
-You only need to do this manual step once, after initial configuration. When you make configuration changes in the future, you just need to restart Nginx for them to take effect.
\ No newline at end of file
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-Ruby 2.3 is recommended; Ruby 2.1 is also known to work.
-
-h4(#rvm). *Option 1: Install with RVM*
-
-<notextile>
-<pre><code><span class="userinput">sudo gpg --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3 7D2BAF1CF37B13E2069D6956105BD0E739499BDB
-\curl -sSL https://get.rvm.io | sudo bash -s stable --ruby=2.3
-</span></code></pre></notextile>
-
-Either log out and log back in to activate RVM, or explicitly load it in all open shells like this:
-
-<notextile>
-<pre><code><span class="userinput">source /usr/local/rvm/scripts/rvm
-</span></code></pre></notextile>
-
-Once RVM is activated in your shell, install Bundler:
-
-<notextile>
-<pre><code>~$ <span class="userinput">gem install bundler</span>
-</code></pre></notextile>
-
-h4(#fromsource). *Option 2: Install from source*
-
-Install prerequisites for Debian 8:
-
-<notextile>
-<pre><code><span class="userinput">sudo apt-get install \
- bison build-essential gettext libcurl3 libcurl3-gnutls \
- libcurl4-openssl-dev libpcre3-dev libreadline-dev \
- libssl-dev libxslt1.1 zlib1g-dev
-</span></code></pre></notextile>
-
-Install prerequisites for CentOS 7:
-
-<notextile>
-<pre><code><span class="userinput">sudo yum install \
- libyaml-devel glibc-headers autoconf gcc-c++ glibc-devel \
- patch readline-devel zlib-devel libffi-devel openssl-devel \
- make automake libtool bison sqlite-devel tar
-</span></code></pre></notextile>
-
-Install prerequisites for Ubuntu 12.04 or 14.04:
-
-<notextile>
-<pre><code><span class="userinput">sudo apt-get install \
- gawk g++ gcc make libc6-dev libreadline6-dev zlib1g-dev libssl-dev \
- libyaml-dev libsqlite3-dev sqlite3 autoconf libgdbm-dev \
- libncurses5-dev automake libtool bison pkg-config libffi-dev curl
-</span></code></pre></notextile>
-
-Build and install Ruby:
-
-<notextile>
-<pre><code><span class="userinput">mkdir -p ~/src
-cd ~/src
-curl -f http://cache.ruby-lang.org/pub/ruby/2.3/ruby-2.3.3.tar.gz | tar xz
-cd ruby-2.3.3
-./configure --disable-install-rdoc
-make
-sudo make install
-
-sudo -i gem install bundler</span>
-</code></pre></notextile>
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-On Debian-based systems:
-
-<notextile>
-<pre><code>~$ <span class="userinput">sudo apt-get install runit</span>
-</code></pre>
-</notextile>
-
-On Red Hat-based systems:
-
-<notextile>
-<pre><code>~$ <span class="userinput">sudo yum install runit</span>
-</code></pre>
-</notextile>
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'notebox_begin_warning' %}
-Arvados pipeline templates are deprecated. The recommended way to develop new workflows for Arvados is using the "Common Workflow Language":{{site.baseurl}}/user/cwl/cwl-runner.html.
-{% include 'notebox_end' %}
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{
- "name":"run-command example pipeline",
- "components":{
- "bwa-mem": {
- "script": "run-command",
- "script_version": "master",
- "repository": "arvados",
- "script_parameters": {
- "command": [
- "bwa",
- "mem",
- "-t",
- "$(node.cores)",
- "$(glob $(dir $(reference_collection))/*.fasta)",
- {
- "foreach": "read_pair",
- "command": "$(read_pair)"
- }
- ],
- "task.stdout": "$(basename $(glob $(dir $(sample))/*_1.fastq)).sam",
- "task.foreach": ["sample_subdir", "read_pair"],
- "reference_collection": {
- "required": true,
- "dataclass": "Collection"
- },
- "sample": {
- "required": true,
- "dataclass": "Collection"
- },
- "sample_subdir": "$(dir $(sample))",
- "read_pair": {
- "value": {
- "group": "sample_subdir",
- "regex": "(.*)_[12]\\.fastq(\\.gz)?$"
- }
- }
- }
- }
- }
-}
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{
- "name":"run-command example pipeline",
- "components":{
- "bwa-mem": {
- "script": "run-command",
- "script_version": "master",
- "repository": "arvados",
- "script_parameters": {
- "command": [
- "$(dir $(bwa_collection))/bwa",
- "mem",
- "-t",
- "$(node.cores)",
- "-R",
- "@RG\\\tID:group_id\\\tPL:illumina\\\tSM:sample_id",
- "$(glob $(dir $(reference_collection))/*.fasta)",
- "$(glob $(dir $(sample))/*_1.fastq)",
- "$(glob $(dir $(sample))/*_2.fastq)"
- ],
- "reference_collection": {
- "required": true,
- "dataclass": "Collection"
- },
- "bwa_collection": {
- "required": true,
- "dataclass": "Collection",
- "default": "39c6f22d40001074f4200a72559ae7eb+5745"
- },
- "sample": {
- "required": true,
- "dataclass": "Collection"
- },
- "task.stdout": "$(basename $(glob $(dir $(sample))/*_1.fastq)).sam"
- }
- }
- }
-}
+++ /dev/null
-#!/usr/bin/env python
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-import arvados
-
-# Automatically parallelize this job by running one task per file.
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True,
- input_as_path=True)
-
-# Get the input file for the task
-input_file = arvados.get_task_param_mount('input')
-
-# Run the external 'md5sum' program on the input file
-stdoutdata, stderrdata = arvados.util.run_command(['md5sum', input_file])
-
-# Save the standard output (stdoutdata) to "md5sum.txt" in the output collection
-out = arvados.CollectionWriter()
-with out.open('md5sum.txt') as out_file:
- out_file.write(stdoutdata)
-arvados.current_task().set_output(out.finish())
h1(#login). Using SSH to log into an Arvados VM
-To see a list of virtual machines that you have access to and determine the name and login information, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu and click on the menu item *Virtual machines* to go to the Virtual machines page. This page lists the virtual machines you can access. The *Host name* column lists the name of each available VM. The *Login name* column will have a list of comma separated values of the form @you@. In this guide the hostname will be *_shell_* and the login will be *_you_*. Replace these with your hostname and login name as appropriate.
+To see a list of virtual machines that you have access to, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, then click on the menu item *Virtual machines* to go to the Virtual machines page.
+This page lists the virtual machines you can access. The *Host name* column lists the name of each available VM. The *Login name* column lists your login name on that VM. The *Command line* column provides a sample @ssh@ command line.
+At the bottom of the page there may be additional instructions for connecting your specific Arvados instance. If so, follow your site-specific instructions. If there are no site-specific instructions, you can probably connect directly with @ssh@.
+
+The following are generic instructions. In the examples the login will be *_you_* and the hostname will be *_shell.ClusterID.example.com_* and . Replace these with your login name and hostname as appropriate.
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{
- "name": "Tutorial align using bwa mem and SortSam",
- "components": {
- "bwa-mem": {
- "script": "run-command",
- "script_version": "master",
- "repository": "arvados",
- "script_parameters": {
- "command": [
- "$(dir $(bwa_collection))/bwa",
- "mem",
- "-t",
- "$(node.cores)",
- "-R",
- "@RG\\\tID:group_id\\\tPL:illumina\\\tSM:sample_id",
- "$(glob $(dir $(reference_collection))/*.fasta)",
- "$(glob $(dir $(sample))/*_1.fastq)",
- "$(glob $(dir $(sample))/*_2.fastq)"
- ],
- "reference_collection": {
- "required": true,
- "dataclass": "Collection"
- },
- "bwa_collection": {
- "required": true,
- "dataclass": "Collection",
- "default": "39c6f22d40001074f4200a72559ae7eb+5745"
- },
- "sample": {
- "required": true,
- "dataclass": "Collection"
- },
- "task.stdout": "$(basename $(glob $(dir $(sample))/*_1.fastq)).sam"
- },
- "runtime_constraints": {
- "docker_image": "bcosc/arv-base-java",
- "arvados_sdk_version": "master"
- }
- },
- "SortSam": {
- "script": "run-command",
- "script_version": "847459b3c257aba65df3e0cbf6777f7148542af2",
- "repository": "arvados",
- "script_parameters": {
- "command": [
- "java",
- "-Xmx4g",
- "-Djava.io.tmpdir=$(tmpdir)",
- "-jar",
- "$(dir $(picard))/SortSam.jar",
- "CREATE_INDEX=True",
- "SORT_ORDER=coordinate",
- "VALIDATION_STRINGENCY=LENIENT",
- "INPUT=$(glob $(dir $(input))/*.sam)",
- "OUTPUT=$(basename $(glob $(dir $(input))/*.sam)).sort.bam"
- ],
- "input": {
- "output_of": "bwa-mem"
- },
- "picard": {
- "required": true,
- "dataclass": "Collection",
- "default": "88447c464574ad7f79e551070043f9a9+1970"
- }
- },
- "runtime_constraints": {
- "docker_image": "bcosc/arv-base-java",
- "arvados_sdk_version": "master"
- }
- }
- }
-}
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{% include 'notebox_begin' %}
-This tutorial assumes you are using the default Arvados instance, @qr1hi@. If you are using a different instance, replace @qr1hi@ with your instance. See "Accessing Arvados Workbench":{{site.baseurl}}/user/getting_started/workbench.html for more details.
-{% include 'notebox_end' %}
{% endcomment %}
{% include 'notebox_begin' %}
-This tutorial assumes that you are logged into an Arvados VM instance (instructions for "Webshell":{{site.baseurl}}/user/getting_started/vm-login-with-webshell.html or "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or you have installed the Arvados "FUSE Driver":{{site.baseurl}}/sdk/python/arvados-fuse.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation and have a "working environment.":{{site.baseurl}}/user/getting_started/check-environment.html
+This tutorial assumes that you have access to the "Arvados command line tools":/user/getting_started/setup-cli.html and have set the "API token":{{site.baseurl}}/user/reference/api-tokens.html and confirmed a "working environment.":{{site.baseurl}}/user/getting_started/check-environment.html .
{% include 'notebox_end' %}
+++ /dev/null
-#!/usr/bin/env python
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-import hashlib # Import the hashlib module to compute MD5.
-import os # Import the os module for basic path manipulation
-import arvados # Import the Arvados sdk module
-
-# Automatically parallelize this job by running one task per file.
-# This means that if the input consists of many files, each file will
-# be processed in parallel on different nodes enabling the job to
-# be completed quicker.
-arvados.job_setup.one_task_per_input_file(if_sequence=0, and_end_task=True,
- input_as_path=True)
-
-# Get object representing the current task
-this_task = arvados.current_task()
-
-# Create the message digest object that will compute the MD5 hash
-digestor = hashlib.new('md5')
-
-# Get the input file for the task
-input_id, input_path = this_task['parameters']['input'].split('/', 1)
-
-# Open the input collection
-input_collection = arvados.CollectionReader(input_id)
-
-# Open the input file for reading
-with input_collection.open(input_path) as input_file:
- for buf in input_file.readall(): # Iterate the file's data blocks
- digestor.update(buf) # Update the MD5 hash object
-
-# Write a new collection as output
-out = arvados.CollectionWriter()
-
-# Write an output file with one line: the MD5 value and input path
-with out.open('md5sum.txt') as out_file:
- out_file.write("{} {}/{}\n".format(digestor.hexdigest(), input_id,
- os.path.normpath(input_path)))
-
-# Commit the output to Keep.
-output_locator = out.finish()
-
-# Use the resulting locator as the output for this task.
-this_task.set_output(output_locator)
-
-# Done!
+#!/usr/bin/env cwl-runner
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-
-{% include 'notebox_end' %}
+cwlVersion: v1.0
+class: CommandLineTool
+inputs: []
+outputs: []
+arguments: ["echo", "hello world!"]
+++ /dev/null
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-{
- "name":"My md5 pipeline",
- "components":{
- "do_hash":{
- "repository":"$USER/$USER",
- "script":"hash.py",
- "script_version":"master",
- "runtime_constraints":{
- "docker_image":"arvados/jobs"
- },
- "script_parameters":{
- "input":{
- "required": true,
- "dataclass": "Collection"
- }
- }
- }
- }
-}
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-The "Common Workflow Language (CWL)":http://commonwl.org is a multi-vendor open standard for describing analysis tools and workflows that are portable across a variety of platforms. CWL is the primary way to develop and run workflows for Arvados. Arvados supports versions "v1.0":http://commonwl.org/v1.0 and "v1.1":http://commonwl.org/v1.1 of the CWL specification.
+The "Common Workflow Language (CWL)":http://commonwl.org is a multi-vendor open standard for describing analysis tools and workflows that are portable across a variety of platforms. CWL is the primary way to develop and run workflows for Arvados. Arvados supports versions "v1.0":http://commonwl.org/v1.0 , "v1.1":http://commonwl.org/v1.1 and "v1.2":http://commonwl.org/v1.2 of the CWL standard.
title: Securing API access with scoped tokens
...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
By default, Arvados API tokens grant unlimited access to a user account, and admin account tokens have unlimited access to the whole system. If you want to grant restricted access to a user account, you can create a "scoped token" which is an Arvados API token which is limited to accessing specific APIs.
One use of token scopes is to grant access to data, such as a collection, to users who do not have an Arvados accounts on your cluster. This is done by creating scoped token that only allows getting a specific record. An example of this is "creating a collection sharing link.":{{site.baseurl}}/sdk/python/cookbook.html#sharing_link
--- /dev/null
+---
+layout: default
+navsection: admin
+title: Setting token expiration policy
+...
+
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+When a user logs in to Workbench, they receive a newly created token that grants access to the Arvados API on behalf of that user. By default, this token does not expire until the user explicitly logs off.
+
+Security policies, such as for GxP Compliance, may require that tokens expire by default in order to limit the risk associated with a token being leaked.
+
+The @Login.TokenLifetime@ configuration enables the administrator to set a expiration lifetime for tokens granted through the login flow.
+
+h2. Setting token expiration
+
+Suppose that the organization's security policy requires that user sessions should not be valid for more than 12 hours, the cluster configuration should be set like the following:
+
+<pre>
+Clusters:
+ zzzzz:
+ ...
+ Login:
+ TokenLifetime: 12h
+ ...
+</pre>
+
+With this configuration, users will have to re-login every 12 hours.
+
+When this configuration is active, the workbench client will also be "untrusted" by default. This means tokens issued to workbench cannot be used to list other tokens issued to the user, and cannot be used to grant new tokens. This stops an attacker from leveraging a leaked token to aquire other tokens.
+
+The default @TokenLifetime@ is zero, which disables this feature.
+
+h2. Applying policy to existing tokens
+
+If you have an existing Arvados installation and want to set a token lifetime policy, there may be user tokens already granted. The administrator can use the following @rake@ tasks to enforce the new policy.
+
+The @db:check_long_lived_tokens@ task will list which users have tokens with no expiration date.
+
+<notextile>
+<pre><code># <span class="userinput">bundle exec rake db:check_long_lived_tokens</span>
+Found 6 long-lived tokens from users:
+user2,user2@example.com,zzzzz-tpzed-5vzt5wc62k46p6r
+admin,admin@example.com,zzzzz-tpzed-6drplgwq9nm5cox
+user1,user1@example.com,zzzzz-tpzed-ftz2tfurbpf7xox
+</code></pre>
+</notextile>
+
+To apply the new policy to existing tokens, use the @db:fix_long_lived_tokens@ task.
+
+<notextile>
+<pre><code># <span class="userinput">bundle exec rake db:fix_long_lived_tokens</span>
+Setting token expiration to: 2020-08-25 03:30:50 +0000
+6 tokens updated.
+</code></pre>
+</notextile>
+
+NOTE: These rake tasks adjust the expiration of all tokens except those belonging to the system root user (@zzzzz-tpzed-000000000000000@). If you have tokens used by automated service accounts that need to be long-lived, you can "create tokens that don't expire using the command line":user-management-cli.html#create-token .
ARVADOS_API_TOKEN=1234567890qwertyuiopasdfghjklzxcvbnm1234567890zzzz
</pre>
-In these examples, @x1u39-tpzed-3kz0nwtjehhl0u4@ is the sample user account. Replace with the uuid of the user you wish to manipulate.
+In these examples, @zzzzz-tpzed-3kz0nwtjehhl0u4@ is the sample user account. Replace with the uuid of the user you wish to manipulate.
See "user management":{{site.baseurl}}/admin/activation.html for an overview of how to use these commands.
This creates a default git repository and VM login. Enables user to self-activate using Workbench.
-<pre>
-arv user setup --uuid x1u39-tpzed-3kz0nwtjehhl0u4
-</pre>
+<notextile>
+<pre><code>$ <span class="userinput">arv user setup --uuid zzzzz-tpzed-3kz0nwtjehhl0u4</span>
+</code></pre>
+</notextile>
+
h3. Deactivate user
-<pre>
-arv user unsetup --uuid x1u39-tpzed-3kz0nwtjehhl0u4
-</pre>
+<notextile>
+<pre><code>$ <span class="userinput">arv user unsetup --uuid zzzzz-tpzed-3kz0nwtjehhl0u4</span>
+</code></pre>
+</notextile>
+
When deactivating a user, you may also want to "reassign ownership of their data":{{site.baseurl}}/admin/reassign-ownership.html .
h3. Directly activate user
-<pre>
-arv user update --uuid "x1u39-tpzed-3kz0nwtjehhl0u4" --user '{"is_active":true}'
-</pre>
+<notextile>
+<pre><code>$ <span class="userinput">arv user update --uuid "zzzzz-tpzed-3kz0nwtjehhl0u4" --user '{"is_active":true}'</span>
+</code></pre>
+</notextile>
+
+Note: this bypasses user agreements checks, and does not set up the user with a default git repository or VM login.
-Note this bypasses user agreements checks, and does not set up the user with a default git repository or VM login.
+h3(#create-token). Create a token for a user
+As an admin, you can create tokens for other users.
+
+<notextile>
+<pre><code>$ <span class="userinput">arv api_client_authorization create --api-client-authorization '{"owner_uuid": "zzzzz-tpzed-fr97h9t4m5jffxs"}'</span>
+{
+ "href":"/api_client_authorizations/zzzzz-gj3su-yyyyyyyyyyyyyyy",
+ "kind":"arvados#apiClientAuthorization",
+ "etag":"9yk144t0v6cvyp0342exoh2vq",
+ "uuid":"zzzzz-gj3su-yyyyyyyyyyyyyyy",
+ "owner_uuid":"zzzzz-tpzed-fr97h9t4m5jffxs",
+ "created_at":"2020-03-12T20:36:12.517375422Z",
+ "modified_by_client_uuid":null,
+ "modified_by_user_uuid":null,
+ "modified_at":null,
+ "user_id":3,
+ "api_client_id":7,
+ "api_token":"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+ "created_by_ip_address":null,
+ "default_owner_uuid":null,
+ "expires_at":null,
+ "last_used_at":null,
+ "last_used_by_ip_address":null,
+ "scopes":["all"]
+}
+</code></pre>
+</notextile>
+
+
+To get the token string, combine the values of @uuid@ and @api_token@ in the form "v2/$uuid/$api_token". In this example the string that goes in @ARVADOS_API_TOKEN@ would be:
+
+<pre>
+ARVADOS_API_TOKEN=v2/zzzzz-gj3su-yyyyyyyyyyyyyyy/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+</pre>
-h2. Permissions
+h2. Adding Permissions
h3. VM login
Cluster identifiers are mapped API server hosts one of two ways:
-* Through DNS resolution, under the @arvadosapi.com@ domain. For example, the API server for the cluster @qr1hi@ can be found at @qr1hi.arvadosapi.com@. To register a cluster id for free under @arvadosapi.com@, contact "info@curii.com":mailto:info@curii.com
+* Through DNS resolution, under the @arvadosapi.com@ domain. For example, the API server for the cluster @pirca@ can be found at @pirca.arvadosapi.com@. To register a cluster id for free under @arvadosapi.com@, contact "info@curii.com":mailto:info@curii.com
* Through explicit configuration:
The @RemoteClusters@ section of @/etc/arvados/config.yml@ (for arvados-controller)
+++ /dev/null
-{
- "name":"GATK / exome PE fastq to snp",
- "components":{
- "extract-reference":{
- "repository":"arvados",
- "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
- "script":"file-select",
- "script_parameters":{
- "names":[
- "human_g1k_v37.fasta.gz",
- "human_g1k_v37.fasta.fai.gz",
- "human_g1k_v37.dict.gz"
- ],
- "input":"d237a90bae3870b3b033aea1e99de4a9+10820+K@qr1hi"
- },
- "output_name":false
- },
- "bwa-index":{
- "repository":"arvados",
- "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
- "script":"bwa-index",
- "script_parameters":{
- "input":{
- "output_of":"extract-reference"
- },
- "bwa_tbz":{
- "value":"8b6e2c4916133e1d859c9e812861ce13+70",
- "required":true
- }
- },
- "output_name":false
- },
- "bwa-aln":{
- "repository":"arvados",
- "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
- "script":"bwa-aln",
- "script_parameters":{
- "input":{
- "dataclass":"Collection",
- "required":"true"
- },
- "reference_index":{
- "output_of":"bwa-index"
- },
- "samtools_tgz":{
- "value":"c777e23cf13e5d5906abfdc08d84bfdb+74",
- "required":true
- },
- "bwa_tbz":{
- "value":"8b6e2c4916133e1d859c9e812861ce13+70",
- "required":true
- }
- },
- "runtime_constraints":{
- "max_tasks_per_node":1
- },
- "output_name":false
- },
- "picard-gatk2-prep":{
- "repository":"arvados",
- "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
- "script":"picard-gatk2-prep",
- "script_parameters":{
- "input":{
- "output_of":"bwa-aln"
- },
- "reference":{
- "output_of":"extract-reference"
- },
- "picard_zip":{
- "value":"687f74675c6a0e925dec619cc2bec25f+77",
- "required":true
- }
- },
- "runtime_constraints":{
- "max_tasks_per_node":1
- },
- "output_name":false
- },
- "GATK2-realign":{
- "repository":"arvados",
- "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
- "script":"GATK2-realign",
- "script_parameters":{
- "input":{
- "output_of":"picard-gatk2-prep"
- },
- "gatk_bundle":{
- "value":"d237a90bae3870b3b033aea1e99de4a9+10820+K@qr1hi",
- "required":true
- },
- "picard_zip":{
- "value":"687f74675c6a0e925dec619cc2bec25f+77",
- "required":true
- },
- "gatk_tbz":{
- "value":"7e0a277d6d2353678a11f56bab3b13f2+87",
- "required":true
- },
- "regions":{
- "value":"13b53dbe1ec032dfc495fd974aa5dd4a+87/S02972011_Covered_sort_merged.bed"
- },
- "region_padding":{
- "value":10
- }
- },
- "runtime_constraints":{
- "max_tasks_per_node":2
- },
- "output_name":false
- },
- "GATK2-bqsr":{
- "repository":"arvados",
- "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
- "script":"GATK2-bqsr",
- "script_parameters":{
- "input":{
- "output_of":"GATK2-realign"
- },
- "gatk_bundle":{
- "value":"d237a90bae3870b3b033aea1e99de4a9+10820+K@qr1hi",
- "required":true
- },
- "picard_zip":{
- "value":"687f74675c6a0e925dec619cc2bec25f+77",
- "required":true
- },
- "gatk_tbz":{
- "value":"7e0a277d6d2353678a11f56bab3b13f2+87",
- "required":true
- }
- },
- "output_name":false
- },
- "GATK2-merge-call":{
- "repository":"arvados",
- "script_version":"e820bd1c6890f93ea1a84ffd5730bbf0e3d8e153",
- "script":"GATK2-merge-call",
- "script_parameters":{
- "input":{
- "output_of":"GATK2-bqsr"
- },
- "gatk_bundle":{
- "value":"d237a90bae3870b3b033aea1e99de4a9+10820+K@qr1hi",
- "required":true
- },
- "picard_zip":{
- "value":"687f74675c6a0e925dec619cc2bec25f+77",
- "required":true
- },
- "gatk_tbz":{
- "value":"7e0a277d6d2353678a11f56bab3b13f2+87",
- "required":true
- },
- "regions":{
- "value":"13b53dbe1ec032dfc495fd974aa5dd4a+87/S02972011_Covered_sort_merged.bed"
- },
- "region_padding":{
- "value":10
- },
- "GATK2_UnifiedGenotyper_args":{
- "default":[
- "-stand_call_conf",
- "30.0",
- "-stand_emit_conf",
- "30.0",
- "-dcov",
- "200"
- ]
- }
- },
- "output_name":"Variant calls from UnifiedGenotyper"
- }
- }
-}
+++ /dev/null
-{
- "name":"Real Time Genomics / PE fastq to snp",
- "components":{
- "extract_reference":{
- "script":"file-select",
- "script_parameters":{
- "names":[
- "human_g1k_v37.fasta.gz"
- ],
- "input":"d237a90bae3870b3b033aea1e99de4a9+10820+K@qr1hi"
- },
- "script_version":"4c1f8cd1431ece2ef11c130d48bb2edfd2f00ec2"
- },
- "reformat_reference":{
- "script_version":"4c1f8cd1431ece2ef11c130d48bb2edfd2f00ec2",
- "script":"rtg-fasta2sdf",
- "script_parameters":{
- "input":{
- "output_of":"extract_reference"
- },
- "rtg_binary_zip":"5d33618193f763b7dc3a3fdfa11d452e+95+K@qr1hi",
- "rtg_license":{
- "optional":false
- }
- }
- },
- "reformat_reads":{
- "script_version":"4c1f8cd1431ece2ef11c130d48bb2edfd2f00ec2",
- "script":"rtg-fastq2sdf",
- "script_parameters":{
- "input":{
- "optional":false
- },
- "rtg_binary_zip":"5d33618193f763b7dc3a3fdfa11d452e+95+K@qr1hi",
- "rtg_license":{
- "optional":false
- }
- }
- },
- "map_reads":{
- "script_version":"4c1f8cd1431ece2ef11c130d48bb2edfd2f00ec2",
- "script":"rtg-map",
- "script_parameters":{
- "input":{
- "output_of":"reformat_reads"
- },
- "reference":{
- "output_of":"reformat_reference"
- },
- "rtg_binary_zip":"5d33618193f763b7dc3a3fdfa11d452e+95+K@qr1hi",
- "rtg_license":{
- "optional":false
- }
- },
- "runtime_constraints":{
- "max_tasks_per_node":1
- }
- },
- "report_snp":{
- "script_version":"4c1f8cd1431ece2ef11c130d48bb2edfd2f00ec2",
- "script":"rtg-snp",
- "script_parameters":{
- "input":{
- "output_of":"map_reads"
- },
- "reference":{
- "output_of":"reformat_reference"
- },
- "rtg_binary_zip":"5d33618193f763b7dc3a3fdfa11d452e+95+K@qr1hi",
- "rtg_license":{
- "optional":false
- }
- }
- }
- }
-}
+++ /dev/null
-#!/usr/bin/env ruby
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: CC-BY-SA-3.0
-
-abort 'Error: Ruby >= 1.9.3 required.' if RUBY_VERSION < '1.9.3'
-
-require 'arvados'
-
-arv = Arvados.new(api_version: 'v1')
-arv.node.list[:items].each do |node|
- if node[:crunch_worker_state] != 'down'
- ping_age = (Time.now - Time.parse(node[:last_ping_at])).to_i rescue -1
- puts "#{node[:uuid]} #{node[:crunch_worker_state]} #{ping_age}"
- end
-end
$ git clone https://github.com/arvados/arvados.git
$ cd arvados/tools/arvbox/bin
$ ./arvbox start localdemo
+$ ./arvbox adduser demouser demo@example.com
</pre>
+You can now log in as @demouser@ using the password you selected.
+
h2. Requirements
* Linux 3.x+ and Docker 1.9+
build <config> build arvbox Docker image
reboot <config> stop, build arvbox Docker image, run
rebuild <config> build arvbox Docker image, no layer cache
+checkpoint create database backup
+restore restore checkpoint
+hotreset reset database and restart API without restarting container
reset delete arvbox arvados data (be careful!)
destroy delete all arvbox code and data (be careful!)
log <service> tail log of specified service
sv <start|stop|restart> <service>
change state of service inside arvbox
clone <from> <to> clone dev arvbox
+adduser <username> <email>
+ add a user login
+removeuser <username>
+ remove user login
+listusers list user logins
</pre>
h2. Install root certificate
h3. dev
-Development configuration. Boots a complete Arvados environment inside the container. The "arvados", "arvado-dev" and "sso-devise-omniauth-provider" code directories along data directories "postgres", "var", "passenger" and "gems" are bind mounted from the host file system for easy access and persistence across container rebuilds. Services are bound to the Docker container's network IP address and can only be accessed on the local host.
+Development configuration. Boots a complete Arvados environment inside the container. The "arvados" and "arvados-dev" code directories along data directories "postgres", "var", "passenger" and "gems" are bind mounted from the host file system for easy access and persistence across container rebuilds. Services are bound to the Docker container's network IP address and can only be accessed on the local host.
-In "dev" mode, you can override the default autogenerated settings of Rails projects by adding "application.yml.override" to any Rails project (sso, api, workbench). This can be used to test out API server settings or point Workbench at an alternate API server.
+In "dev" mode, you can override the default autogenerated settings of Rails projects by adding "application.yml.override" to any Rails project (api, workbench). This can be used to test out API server settings or point Workbench at an alternate API server.
h3. localdemo
The root directory of the Arvados-dev source tree
default: $ARVBOX_DATA/arvados-dev
-h3. SSO_ROOT
-
-The root directory of the SSO source tree
-default: $ARVBOX_DATA/sso-devise-omniauth-provider
-
h3. ARVBOX_PUBLISH_IP
The IP address on which to publish services when running in public configuration. Overrides default detection of the host's IP address.
+++ /dev/null
----
-layout: default
-navsection: installguide
-title: Copy pipeline from the Arvados Playground
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-This tutorial describes how to find and copy a publicly shared pipeline from the Arvados Playground. Please note that you can use similar steps to copy any template you can access from the Arvados Playground to your cluster.
-
-h3. Access a public pipeline in the Arvados Playground using Workbench
-
-the Arvados Playground provides access to some public data, which can be used to experience Arvados in action. Let's access a public pipeline and copy it to your cluster, so that you can run it in your environment.
-
-Start by visiting the "*Arvados Playground public projects page*":https://playground.arvados.org/projects/public. This page lists all the publicly accessible projects in this arvados installation. Click on one of these projects to open it. We will use "*lobSTR v.3 (Public)*":https://playground.arvados.org/projects/qr1hi-j7d0g-up6qgpqz5ie2vfq as the example in this tutorial.
-
-Once in the "*lobSTR v.3 (Public)*":https://playground.arvados.org/projects/qr1hi-j7d0g-up6qgpqz5ie2vfq project, click on the *Pipeline templates* tab. In the pipeline templates tab, you will see a template named *lobSTR v.3*. Click on the <span class="fa fa-lg fa-gears"></span> *Show* button to the left of this name. This will take to you to the "*lobSTR v.3*":https://playground.arvados.org/pipeline_templates/qr1hi-p5p6p-9pkaxt6qjnkxhhu template page.
-
-Once in this page, you can take the *uuid* of this template from the address bar, which is *qr1hi-p5p6p-9pkaxt6qjnkxhhu*. Next, we will copy this template to your Arvados instance.
-
-h3. Copying a pipeline template from the Arvados Playground to your cluster
-
-As described above, navigate to the publicly shared pipeline template "*lobSTR v.3*":https://playground.arvados.org/pipeline_templates/qr1hi-p5p6p-9pkaxt6qjnkxhhu on the Arvados Playground. We will now copy this template with uuid *qr1hi-p5p6p-9pkaxt6qjnkxhhu* to your cluster.
-
-{% include 'tutorial_expectations' %}
-
-We will use the Arvados *arv-copy* command to copy this template to your cluster. In order to use arv-copy, first you need to setup the source and destination cluster configuration files. Here, *qr1hi* would be the source cluster and your Arvados instance would be the *dst_cluster*.
-
-During this setup, if you have an account in the Arvados Playground, you can use "your access token":#using-your-token to create the source configuration file. If you do not have an account in the Arvados Playground, you can use the "anonymous access token":#using-anonymous-token for the source cluster configuration.
-
-h4(#using-anonymous-token). *Configuring source and destination setup files using anonymous access token*
-
-Configure the source and destination clusters as described in the "*Using arv-copy*":http://doc.arvados.org/user/topics/arv-copy.html tutorial in user guide, while using *5vqmz9mik2ou2k9objb8pnyce8t97p6vocyaouqo3qalvpmjs5* as the API token for source configuration.
-
-<notextile>
-<pre><code>~$ <span class="userinput">cd ~/.config/arvados</span>
-~$ <span class="userinput">echo "ARVADOS_API_HOST=qr1hi.arvadosapi.com" >> qr1hi.conf</span>
-~$ <span class="userinput">echo "ARVADOS_API_TOKEN=5vqmz9mik2ou2k9objb8pnyce8t97p6vocyaouqo3qalvpmjs5" >> qr1hi.conf</span>
-</code></pre>
-</notextile>
-
-You can now copy the pipeline template from *qr1hi* to *your cluster*. Replace *dst_cluster* with the *ClusterID* of your cluster.
-
-<notextile>
-<pre><code>~$ <span class="userinput"> arv-copy --no-recursive --src qr1hi --dst dst_cluster qr1hi-p5p6p-9pkaxt6qjnkxhhu</span>
-</code></pre>
-</notextile>
-
-*Note:* When you are using anonymous access token to copy the template, you will not be able to do a recursive copy since you will not be able to provide the dst-git-repo parameter. In order to perform a recursive copy of the template, you would need to use the Arvados API token from your account as explained in the "using your token":#using-your-token section below.
-
-h4(#using-your-token). *Configuring source and destination setup files using personal access token*
-
-If you already have an account in the Arvados Playground, you can follow the instructions in the "*Using arv-copy*":http://doc.arvados.org/user/topics/arv-copy.html user guide to get your *Current token* for source and destination clusters, and use them to create the source *qr1hi.conf* and dst_cluster.conf configuration files.
-
-You can now copy the pipeline template from *qr1hi* to *your cluster* with or without recursion. Replace *dst_cluster* with the *ClusterID* of your cluster.
-
-*Non-recursive copy:*
-<notextile>
-<pre><code>~$ <span class="userinput"> arv-copy --no-recursive --src qr1hi --dst dst_cluster qr1hi-p5p6p-9pkaxt6qjnkxhhu</span></code></pre>
-</notextile>
-
-*Recursive copy:*
-<notextile>
-<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial qr1hi-p5p6p-9pkaxt6qjnkxhhu</span></code></pre>
-</notextile>
Azure secrets file which will be sourced from this script
--azure-resource-group (default: false, required if building for Azure)
Azure resource group
- --azure-storage-account (default: false, required if building for Azure)
- Azure storage account
--azure-location (default: false, required if building for Azure)
Azure location, e.g. centralus, eastus, westeurope
--azure-sku (default: unset, required if building for Azure, e.g. 16.04-LTS)
<notextile><pre><code>~$ <span class="userinput">./build.sh --json-file arvados-images-azure.json \
--arvados-cluster-id ClusterID \
--azure-resource-group ResourceGroup \
- --azure-storage-account StorageAccount \
--azure-location AzureRegion \
--azure-sku AzureSKU \
--azure-secrets-file AzureSecretsFilePath \
</span>
</code></pre></notextile>
-For @ClusterID@, fill in your cluster ID. The @ResourceGroup@, @StorageAccount@ and @AzureRegion@ (e.g. 'eastus2') should be configured for where you want the compute image to be generated and stored. The @AzureSKU@ is the SKU of the base image to be used, e.g. '18.04-LTS' for Ubuntu 18.04.
+For @ClusterID@, fill in your cluster ID. The @ResourceGroup@ and @AzureRegion@ (e.g. 'eastus2') should be configured for where you want the compute image to be generated and stored. The @AzureSKU@ is the SKU of the base image to be used, e.g. '18.04-LTS' for Ubuntu 18.04.
@AzureSecretsFilePath@ should be replaced with the path to a shell script that loads the Azure secrets with sufficient permissions to create the image. The file would look like this:
h4. Minimal configuration example for Azure
+Using managed disks:
+
+<notextile>
+<pre><code> Containers:
+ CloudVMs:
+ ImageID: "zzzzz-compute-v1597349873"
+ Driver: azure
+ DriverParameters:
+ # Credentials.
+ SubscriptionID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+ ClientID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+ ClientSecret: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ TenantID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+
+ # Data center where VMs will be allocated
+ Location: centralus
+
+ # The resource group where the VM and virtual NIC will be
+ # created.
+ ResourceGroup: zzzzz
+ NetworkResourceGroup: yyyyy # only if different from ResourceGroup
+ Network: xxxxx
+ Subnet: xxxxx-subnet-private
+
+ # The resource group where the disk image is stored, only needs to
+ # be specified if it is different from ResourceGroup
+ ImageResourceGroup: aaaaa
+
+</code></pre>
+</notextile>
+
+Azure recommends using managed images. If you plan to start more than 20 VMs simultaneously, Azure recommends using a shared image gallery instead to avoid slowdowns and timeouts during the creation of the VMs.
+
+Using an image from a shared image gallery:
+
+<notextile>
+<pre><code> Containers:
+ CloudVMs:
+ ImageID: "shared_image_gallery_image_definition_name"
+ Driver: azure
+ DriverParameters:
+ # Credentials.
+ SubscriptionID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+ ClientID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+ ClientSecret: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+ TenantID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+
+ # Data center where VMs will be allocated
+ Location: centralus
+
+ # The resource group where the VM and virtual NIC will be
+ # created.
+ ResourceGroup: zzzzz
+ NetworkResourceGroup: yyyyy # only if different from ResourceGroup
+ Network: xxxxx
+ Subnet: xxxxx-subnet-private
+
+ # The resource group where the disk image is stored, only needs to
+ # be specified if it is different from ResourceGroup
+ ImageResourceGroup: aaaaa
+
+ # (azure) shared image gallery: the name of the gallery
+ SharedImageGalleryName: "shared_image_gallery_1"
+ # (azure) shared image gallery: the version of the image definition
+ SharedImageGalleryImageVersion: "0.0.1"
+
+</code></pre>
+</notextile>
+
+Using unmanaged disks (deprecated):
+
<notextile>
<pre><code> Containers:
CloudVMs:
public static void main(String[] argv) {
ConfigProvider conf = ExternalConfigProvider.builder().
apiProtocol("https").
- apiHost("qr1hi.arvadosapi.com").
+ apiHost("zzzzz.arvadosapi.com").
apiPort(443).
apiToken("...").
build();
--- /dev/null
+---
+layout: default
+navsection: sdk
+navmenu: Python
+title: Arvados CWL Runner
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+The Arvados FUSE driver is a Python utility that allows you to see the Keep service as a normal filesystem, so that data can be accessed using standard tools. This driver requires the Python SDK installed in order to access Arvados services.
+
+h2. Installation
+
+If you are logged in to a managed Arvados VM, the @arv-mount@ utility should already be installed.
+
+To use the FUSE driver elsewhere, you can install from a distribution package, or PyPI.
+
+h2. Option 1: Install from distribution packages
+
+First, "add the appropriate package repository for your distribution":{{ site.baseurl }}/install/packages.html
+
+{% assign arvados_component = 'python3-arvados-cwl-runner' %}
+
+{% include 'install_packages' %}
+
+h2. Option 2: Install with pip
+
+Run @pip install arvados-cwl-runner@ in an appropriate installation environment, such as a virtualenv.
+
+Note:
+
+The SDK uses @pycurl@ which depends on the @libcurl@ C library. To build the module you may have to first install additional packages. On Debian 9 this is:
+
+<pre>
+$ apt-get install git build-essential python-dev libcurl4-openssl-dev libssl1.0-dev python-llfuse
+</pre>
+
+For Python 3 this is:
+
+<pre>
+$ apt-get install git build-essential python3-dev libcurl4-openssl-dev libssl1.0-dev python3-llfuse
+</pre>
+
+h3. Check Docker access
+
+In order to pull and upload Docker images, @arvados-cwl-runner@ requires access to Docker. You do not need Docker if the Docker images you intend to use are already available in Arvados.
+
+You can determine if you have access to Docker by running @docker version@:
+
+<notextile>
+<pre><code>~$ <span class="userinput">docker version</span>
+Client:
+ Version: 1.9.1
+ API version: 1.21
+ Go version: go1.4.2
+ Git commit: a34a1d5
+ Built: Fri Nov 20 12:59:02 UTC 2015
+ OS/Arch: linux/amd64
+
+Server:
+ Version: 1.9.1
+ API version: 1.21
+ Go version: go1.4.2
+ Git commit: a34a1d5
+ Built: Fri Nov 20 12:59:02 UTC 2015
+ OS/Arch: linux/amd64
+</code></pre>
+</notextile>
+
+If this returns an error, contact the sysadmin of your cluster for assistance.
+
+h3. Usage
+
+Please refer to the "Accessing Keep from GNU/Linux":{{site.baseurl}}/user/tutorials/tutorial-keep-mount-gnu-linux.html tutorial for more information.
{% codeblock as python %}
import arvados
api = arvados.api()
-container_request_uuid="qr1hi-xvhdp-zzzzzzzzzzzzzzz"
+container_request_uuid="zzzzz-xvhdp-zzzzzzzzzzzzzzz"
container_request = api.container_requests().get(uuid=container_request_uuid).execute()
print(container_request["mounts"]["/var/lib/cwl/cwl.input.json"])
{% endcodeblock %}
import arvados
import arvados.collection
api = arvados.api()
-container_request_uuid="qr1hi-xvhdp-zzzzzzzzzzzzzzz"
+container_request_uuid="zzzzz-xvhdp-zzzzzzzzzzzzzzz"
container_request = api.container_requests().get(uuid=container_request_uuid).execute()
collection = arvados.collection.CollectionReader(container_request["output_uuid"])
print(collection.open("cwl.output.json").read())
elif c['runtime_status'].get('warning', None):
return 'Warning'
return c['state']
-container_request_uuid = 'qr1hi-xvhdp-zzzzzzzzzzzzzzz'
+container_request_uuid = 'zzzzz-xvhdp-zzzzzzzzzzzzzzz'
print(get_cr_state(container_request_uuid))
{% endcodeblock %}
{% codeblock as python %}
import arvados
api = arvados.api()
-parent_request_uuid = "qr1hi-xvhdp-zzzzzzzzzzzzzzz"
+parent_request_uuid = "zzzzz-xvhdp-zzzzzzzzzzzzzzz"
namefilter = "bwa%" # the "like" filter uses SQL pattern match syntax
container_request = api.container_requests().get(uuid=parent_request_uuid).execute()
parent_container_uuid = container_request["container_uuid"]
{% codeblock as python %}
import arvados
api = arvados.api()
-parent_request_uuid = "qr1hi-xvhdp-zzzzzzzzzzzzzzz"
+parent_request_uuid = "zzzzz-xvhdp-zzzzzzzzzzzzzzz"
namefilter = "bwa%" # the "like" filter uses SQL pattern match syntax
container_request = api.container_requests().get(uuid=parent_request_uuid).execute()
parent_container_uuid = container_request["container_uuid"]
{% codeblock as python %}
import arvados
api = arvados.api()
-parent_request_uuid = "qr1hi-xvhdp-zzzzzzzzzzzzzzz"
+parent_request_uuid = "zzzzz-xvhdp-zzzzzzzzzzzzzzz"
container_request = api.container_requests().get(uuid=parent_request_uuid).execute()
parent_container_uuid = container_request["container_uuid"]
child_requests = api.container_requests().list(filters=[
import arvados
import arvados.collection
api = arvados.api()
-container_request_uuid = "qr1hi-xvhdp-zzzzzzzzzzzzzzz"
+container_request_uuid = "zzzzz-xvhdp-zzzzzzzzzzzzzzz"
container_request = api.container_requests().get(uuid=container_request_uuid).execute()
collection = arvados.collection.CollectionReader(container_request["log_uuid"])
for c in collection:
import arvados
api = arvados.api()
download="https://your.download.server"
-collection_uuid="qr1hi-4zz18-zzzzzzzzzzzzzzz"
+collection_uuid="zzzzz-4zz18-zzzzzzzzzzzzzzz"
token = api.api_client_authorizations().create(body={"api_client_authorization":{"scopes": [
"GET /arvados/v1/collections/%s" % collection_uuid,
"GET /arvados/v1/collections/%s/" % collection_uuid,
import arvados
import arvados.collection
api = arvados.api()
-project_uuid = "qr1hi-tpzed-zzzzzzzzzzzzzzz"
-collection_uuids = ["qr1hi-4zz18-aaaaaaaaaaaaaaa", "qr1hi-4zz18-bbbbbbbbbbbbbbb"]
+project_uuid = "zzzzz-tpzed-zzzzzzzzzzzzzzz"
+collection_uuids = ["zzzzz-4zz18-aaaaaaaaaaaaaaa", "zzzzz-4zz18-bbbbbbbbbbbbbbb"]
combined_manifest = ""
for u in collection_uuids:
c = api.collections().get(uuid=u).execute()
import arvados
import arvados.collection
-project_uuid = "qr1hi-j7d0g-zzzzzzzzzzzzzzz"
+project_uuid = "zzzzz-j7d0g-zzzzzzzzzzzzzzz"
collection_name = "My collection"
filename = "file1.txt"
import arvados
import arvados.collection
-collection_uuid = "qr1hi-4zz18-zzzzzzzzzzzzzzz"
+collection_uuid = "zzzzz-4zz18-zzzzzzzzzzzzzzz"
filename = "file1.txt"
api = arvados.api()
layout: default
navsection: sdk
navmenu: Python
-title: Subscribing to events
+title: Subscribing to database events
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
puts "UUID of first repo returned is #{first_repo[:uuid]}"</code>
{% endcodeblock %}
-UUID of first repo returned is qr1hi-s0uqq-b1bnybpx3u5temz
+UUID of first repo returned is zzzzz-s0uqq-b1bnybpx3u5temz
h2. update
+++ /dev/null
----
-layout: default
-navsection: start
-title: Run your first pipeline in minutes
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-h2. LobSTR v3
-
-In this quickstart guide, we'll run an existing pipeline with pre-existing data. Step-by-step instructions are shown below. You can follow along using your own local install or by using the <a href="https://playground.arvados.org/">Arvados Playground</a> (any Google account can be used to log in).
-
-(For more information about this pipeline, see our <a href="https://dev.arvados.org/projects/arvados/wiki/LobSTR_tutorial">detailed lobSTR guide</a>).
-
-<div id="carousel-firstpipe" class="carousel slide" data-interval="false">
- <!-- Indicators -->
- <ol class="carousel-indicators">
- <li data-target="#carousel-firstpipe" data-slide-to="0" class="active"></li>
- <li data-target="#carousel-firstpipe" data-slide-to="1"></li>
- <li data-target="#carousel-firstpipe" data-slide-to="2"></li>
- <li data-target="#carousel-firstpipe" data-slide-to="3"></li>
- <li data-target="#carousel-firstpipe" data-slide-to="4"></li>
- <li data-target="#carousel-firstpipe" data-slide-to="5"></li>
- <li data-target="#carousel-firstpipe" data-slide-to="6"></li>
- </ol>
-
- <!-- Wrapper for slides -->
- <div class="carousel-inner" role="listbox">
- <div class="item active">
- <img src="{{ site.baseurl }}/images/quickstart/1.png" alt="Step 1. At the dashboard, click 'Run a pipeline...'.">
- <div class="carousel-caption">
- Step 1. At the dashboard, click 'Run a pipeline...'.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/quickstart/2.png" alt="Choose 'lobstr v.3' and hit 'Next'.">
- <div class="carousel-caption">
- Choose 'lobstr v.3' and hit 'Next'.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/quickstart/3.png" alt="Rename the pipeline instance, then click 'Run'. Click 'Choose' to change the default inputs.">
- <div class="carousel-caption">
- Rename the pipeline instance, then click 'Run'. Click 'Choose' to change the default inputs.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/quickstart/4.png" alt="Here we search for and choose new inputs.">
- <div class="carousel-caption">
- Here we search for and choose new inputs.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/quickstart/5.png" alt="After the job completes, you can re-run it with one click.">
- <div class="carousel-caption">
- After the job completes, you can re-run it with one click.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/quickstart/6.png" alt="You can inspect details about the pipeline which are automatically logged.">
- <div class="carousel-caption">
- You can inspect automatically-logged details about the pipeline.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/quickstart/7.png" alt="Click 'Create sharing link' to share the output files with people outside Arvados. [END]">
- <div class="carousel-caption">
- Click 'Create sharing link' to share the output files with people outside Arvados. [END]
- </div>
- </div>
-
- </div>
-
- <!-- Controls -->
- <a class="left carousel-control" href="#carousel-firstpipe" role="button" data-slide="prev">
- <span class="glyphicon glyphicon-chevron-left" aria-hidden="true"></span>
- <span class="sr-only">Previous</span>
- </a>
- <a class="right carousel-control" href="#carousel-firstpipe" role="button" data-slide="next">
- <span class="glyphicon glyphicon-chevron-right" aria-hidden="true"></span>
- <span class="sr-only">Next</span>
- </a>
-</div>
-
-Tip: You may need to make your browser window bigger to see full-size images in the gallery above.
+++ /dev/null
----
-layout: default
-navsection: start
-title: Check out the User Guide
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-Now that you've finished the Getting Started guide, check out the "User Guide":{{site.baseurl}}/user/index.html. The User Guide goes into more depth than the Getting Started guide, covers how to develop your own pipelines in addition to using pre-existing pipelines, covers the Arvados command line tools in addition to the Workbench graphical interface to Arvados, and can be referenced in any order.
+++ /dev/null
----
-layout: default
-navsection: start
-title: Visit an Arvados Public Project
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-h2. <a href="https://workbench.qr1hi.arvadosapi.com/projects/qr1hi-j7d0g-662ij1pcw6bj8uj">Mason Lab - Pathomap / Ancestry Mapper (Public)</a>
-
-You can see Arvados in action by accessing the <a href="https://workbench.qr1hi.arvadosapi.com/projects/qr1hi-j7d0g-662ij1pcw6bj8uj">Mason Lab - Pathomap / Ancestry Mapper (Public) project</a>. By visiting this project, you can see what an Arvados project is, access data collections in this project, and click through a pipeline instance's contents.
-
-You will be accessing this project in read-only mode and will not be able to make any modifications such as running a new pipeline instance.
-
-<div id="carousel-publicproject" class="carousel slide" data-interval="false">
- <!-- Indicators -->
- <ol class="carousel-indicators">
- <li data-target="#carousel-publicproject" data-slide-to="0" class="active"></li>
- <li data-target="#carousel-publicproject" data-slide-to="1"></li>
- <li data-target="#carousel-publicproject" data-slide-to="2"></li>
- <li data-target="#carousel-publicproject" data-slide-to="3"></li>
- <li data-target="#carousel-publicproject" data-slide-to="4"></li>
- <li data-target="#carousel-publicproject" data-slide-to="5"></li>
- <li data-target="#carousel-publicproject" data-slide-to="6"></li>
- <li data-target="#carousel-publicproject" data-slide-to="7"></li>
- <li data-target="#carousel-publicproject" data-slide-to="8"></li>
- <li data-target="#carousel-publicproject" data-slide-to="9"></li>
- <li data-target="#carousel-publicproject" data-slide-to="10"></li>
- <li data-target="#carousel-publicproject" data-slide-to="11"></li>
- </ol>
-
- <!-- Wrapper for slides -->
- <div class="carousel-inner" role="listbox">
- <div class="item active">
- <img src="{{ site.baseurl }}/images/publicproject/description.png" alt="Step 1. The project's first tab, *Description*, describes what this project is all about.">
- <div class="carousel-caption">
- Step 1. The project's first tab, *Description*, describes what this project is all about.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/publicproject/collections.png" alt="The *Data collections* tab contains the various pipeline inputs, logs, and outputs.">
- <div class="carousel-caption">
- The *Data collections* tab contains the various pipeline inputs, logs, and outputs.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/publicproject/instances.png" alt="You can see the jobs and pipelines in this project by accessing the *Jobs and pipelines* tab.">
- <div class="carousel-caption">
- You can see the jobs and pipelines in this project by accessing the *Jobs and pipelines* tab.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/publicproject/collection-show.png" alt="In the *Data collections* tab, click on the *Show* icon to the left of a collection to see the collection contents.">
- <div class="carousel-caption">
- In the *Data collections* tab, click on the *Show* icon to the left of a collection to see the collection contents.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/publicproject/collection-files.png" alt="The collection page lists the details about it. The *Files* tab can be used to view and download individual files in it.">
- <div class="carousel-caption">
- The collection page lists the details about it. The *Files* tab can be used to view and download individual files in it.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/publicproject/collection-graph.png" alt="The collection *Provenance graph* tab gives a visual representation of this collection's provenance.">
- <div class="carousel-caption">
- The collection *Provenance graph* tab gives a visual representation of this collection's provenance.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/publicproject/instance-show.png" alt="In the project *Jobs and pipelines* tab, click on the *Show* icon to the left of a pipeline to access the pipeline contents.">
- <div class="carousel-caption">
- In the project *Jobs and pipelines* tab, click on the *Show* icon to the left of a pipeline to access the pipeline contents.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/publicproject/instance-components.png" alt="The pipeline *Components* tab details the various jobs in it and how long it took to run it.">
- <div class="carousel-caption">
- The pipeline *Components* tab details the various jobs in it and how long it took to run it.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/publicproject/instance-job.png" alt="Click on the down arrow in one of the job rows to see the job details. You can also click on the job's output.">
- <div class="carousel-caption">
- Click on the down arrow <i class="fa fa-lg fa-fw fa-caret-down"></i> in one of the job rows to see the job details. You can also click on the job's output.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/publicproject/instance-log.png" alt="The *Log* tab can be used to see the log for the pipeline instance.">
- <div class="carousel-caption">
- The *Log* tab can be used to see the log for the pipeline instance.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/publicproject/instance-graph.png" alt="The *Graph* tab provides a visual representation of the pipeline run.">
- <div class="carousel-caption">
- The *Graph* tab provides a visual representation of the pipeline run.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/publicproject/instance-advanced.png" alt="The *Advanced* tab can be used to access metadata about the pipeline. [END]">
- <div class="carousel-caption">
- The *Advanced* tab can be used to access metadata about the pipeline. [END]
- </div>
- </div>
- </div>
-
- <!-- Controls -->
- <a class="left carousel-control" href="#carousel-publicproject" role="button" data-slide="prev">
- <span class="glyphicon glyphicon-chevron-left" aria-hidden="true"></span>
- <span class="sr-only">Previous</span>
- </a>
- <a class="right carousel-control" href="#carousel-publicproject" role="button" data-slide="next">
- <span class="glyphicon glyphicon-chevron-right" aria-hidden="true"></span>
- <span class="sr-only">Next</span>
- </a>
-</div>
-
-Tip: You may need to make your browser window bigger to see full-size images in the gallery above.
+++ /dev/null
----
-layout: default
-navsection: start
-title: Sharing Data
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-You can easily share data entirely through Workbench, the web interface to Arvados.
-
-h2. Upload and share your existing data
-
-Step-by-step instructions are shown below.
-
-<div id="carousel-sharedata" class="carousel slide" data-interval="false">
- <!-- Indicators -->
- <ol class="carousel-indicators">
- <li data-target="#carousel-sharedata" data-slide-to="0" class="active"></li>
- <li data-target="#carousel-sharedata" data-slide-to="1"></li>
- <li data-target="#carousel-sharedata" data-slide-to="2"></li>
- <li data-target="#carousel-sharedata" data-slide-to="3"></li>
- <li data-target="#carousel-sharedata" data-slide-to="4"></li>
- <li data-target="#carousel-sharedata" data-slide-to="5"></li>
- <li data-target="#carousel-sharedata" data-slide-to="6"></li>
- <li data-target="#carousel-sharedata" data-slide-to="7"></li>
- </ol>
-
- <!-- Wrapper for slides -->
- <div class="carousel-inner" role="listbox">
- <div class="item active">
- <img src="{{ site.baseurl }}/images/uses/gotohome.png" alt="Step 1. From the dashboard, go to your Home project.">
- <div class="carousel-caption">
- Step 1. From the dashboard, go to your Home project.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/uses/uploaddata.png" alt="Click 'Add data' → 'Upload files'.">
- <div class="carousel-caption">
- Click 'Add data' → 'Upload files'.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/uses/choosefiles.png" alt="A new collection is created automatically. Choose files to upload and hit Start.">
- <div class="carousel-caption">
- A new collection is created automatically. Choose files to upload and hit Start.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/uses/uploading.png" alt="Files will upload and stay uploaded even if the browser is closed.">
- <div class="carousel-caption">
- Files will upload and stay uploaded even if the browser is closed.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/uses/rename.png" alt="Rename the collection appropriately.">
- <div class="carousel-caption">
- Rename the collection appropriately.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/uses/sharing.png" alt="Click 'Create sharing link'. You can click 'unshare' at any later point.">
- <div class="carousel-caption">
- Click 'Create sharing link'. You can click 'Unshare' at any later point.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/uses/shared.png" alt="Now just share this link with anyone you want.">
- <div class="carousel-caption">
- Now just share this link with anyone you want.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/uses/sharedsubdirs.png" alt="Here's a more complex collection. [END]">
- <div class="carousel-caption">
- Here's a more complex collection. [END]
- </div>
- </div>
-
- </div>
-
- <!-- Controls -->
- <a class="left carousel-control" href="#carousel-sharedata" role="button" data-slide="prev">
- <span class="glyphicon glyphicon-chevron-left" aria-hidden="true"></span>
- <span class="sr-only">Previous</span>
- </a>
- <a class="right carousel-control" href="#carousel-sharedata" role="button" data-slide="next">
- <span class="glyphicon glyphicon-chevron-right" aria-hidden="true"></span>
- <span class="sr-only">Next</span>
- </a>
-</div>
-
-Tip: You may need to make your browser window bigger to see full-size images in the gallery above.
+++ /dev/null
----
-layout: default
-navsection: start
-title: Welcome to Arvados!
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-This guide provides an introduction to using Arvados to solve big data bioinformatics problems.
-
-h2. What is Arvados?
-
-Arvados is a free and open source bioinformatics platform for genomic and biomedical data.
-
-We address the needs of IT directors, lab principals, and bioinformaticians.
-
-h2. Why use Arvados?
-
-Arvados enables you to quickly begin using cloud computing resources in your bioinformatics work. It allows you to track your methods and datasets, share them securely, and easily re-run analyses.
-
-h3. Take a look (Screenshots gallery)
-
-<div id="carousel-keyfeatures" class="carousel slide" data-interval="false">
- <!-- Indicators -->
- <ol class="carousel-indicators">
- <li data-target="#carousel-keyfeatures" data-slide-to="0" class="active"></li>
- <li data-target="#carousel-keyfeatures" data-slide-to="1"></li>
- <li data-target="#carousel-keyfeatures" data-slide-to="2"></li>
- <li data-target="#carousel-keyfeatures" data-slide-to="3"></li>
- <li data-target="#carousel-keyfeatures" data-slide-to="4"></li>
- <li data-target="#carousel-keyfeatures" data-slide-to="5"></li>
- <li data-target="#carousel-keyfeatures" data-slide-to="6"></li>
- <li data-target="#carousel-keyfeatures" data-slide-to="7"></li>
- <li data-target="#carousel-keyfeatures" data-slide-to="8"></li>
- </ol>
-
- <!-- Wrapper for slides -->
- <div class="carousel-inner" role="listbox">
- <div class="item active">
- <img src="{{ site.baseurl }}/images/keyfeatures/dashboard2.png" alt="[START] After logging in, you will see Workbench's dashboard.">
- <div class="carousel-caption">
- [START] After logging in, you will see Workbench's dashboard.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/keyfeatures/running2.png" alt="Pipelines describe a set of computational tasks (jobs).">
- <div class="carousel-caption">
- Pipelines describe a set of computational tasks (jobs).
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/keyfeatures/log.png" alt="The output of all jobs is logged and stored automatically.">
- <div class="carousel-caption">
- The output of all jobs is logged and stored automatically.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/keyfeatures/graph.png" alt="Pipelines can also be viewed in auto-generated graph form.">
- <div class="carousel-caption">
- Pipelines can also be viewed in auto-generated graph form.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/keyfeatures/rerun.png" alt="Pipelines can easily be re-run exactly as before, or...">
- <div class="carousel-caption">
- Pipelines can easily be re-run exactly as before, or...
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/keyfeatures/chooseinputs.png" alt="...you can change parameters or pick new datasets.">
- <div class="carousel-caption">
- ...you can change parameters or pick new datasets.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/keyfeatures/webupload.png" alt="With web upload, data can be uploaded right in Workbench.">
- <div class="carousel-caption">
- With web upload, data can be uploaded right in Workbench.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/keyfeatures/collectionpage.png" alt="Collections allow sharing datasets and job outputs easily. 'Create sharing link' with one click.">
- <div class="carousel-caption">
- Collections allow sharing datasets and job outputs easily. 'Create sharing link' with one click.
- </div>
- </div>
-
- <div class="item">
- <img src="{{ site.baseurl }}/images/keyfeatures/provenance.png" alt="Data provenance is tracked automatically. [END]">
- <div class="carousel-caption">
- Data provenance is tracked automatically. [END]
- </div>
- </div>
-
-
- </div>
-
- <!-- Controls -->
- <a class="left carousel-control" href="#carousel-keyfeatures" role="button" data-slide="prev">
- <span class="glyphicon glyphicon-chevron-left" aria-hidden="true"></span>
- <span class="sr-only">Previous</span>
- </a>
- <a class="right carousel-control" href="#carousel-keyfeatures" role="button" data-slide="next">
- <span class="glyphicon glyphicon-chevron-right" aria-hidden="true"></span>
- <span class="sr-only">Next</span>
- </a>
-</div>
-
-Note: Workbench is the web interface to Arvados.
-Tip: You may need to make your browser window bigger to see full-size images in the gallery above.
-
-h3. Key Features
-
-<ul>
-<li><strong>Track your methods</strong><br/>
-We log every compute job: software versions, machine images, input and output data hashes. Rely on a computer, not your memory and your note-taking skills.<br/><br/></li>
-<li><strong>Share your methods</strong><br/>
-Show other people what you did. Let them use your workflow on their own data. Publish a permalink to your methods and data, so others can reproduce and build on them easily.<br/><br/></li>
-<li><strong>Track data origin</strong><br/>
-Did you really only use fully consented public data in this analysis?<br/><br/></li>
-<li><strong>Get results sooner</strong><br/>
-Run your compute jobs faster by using multi-nodes and multi-cores, even if your programs are single-threaded.<br/><br/></li>
-</ul>
reference:
class: File
location: keep:2463fa9efeb75e099685528b3b9071e0+438/19.fasta.bwt
- arv:collectionUUID: qr1hi-4zz18-pwid4w22a40jp8l
+ arv:collectionUUID: jutro-4zz18-tv416l321i4r01e
read_p1:
class: File
location: keep:ae480c5099b81e17267b7445e35b4bc7+180/HWI-ST1027_129_D0THKACXX.1_1.fastq
- arv:collectionUUID: qr1hi-4zz18-h615rgfmqt3wje0
+ arv:collectionUUID: jutro-4zz18-8k5hsvee0izv2g3
read_p2:
class: File
location: keep:ae480c5099b81e17267b7445e35b4bc7+180/HWI-ST1027_129_D0THKACXX.1_2.fastq
- arv:collectionUUID: qr1hi-4zz18-h615rgfmqt3wje0
+ arv:collectionUUID: jutro-4zz18-8k5hsvee0izv2g3
group_id: arvados_tutorial
sample_id: HWI-ST1027_129
PL: illumina
cwl:tool: bwa-mem.cwl
reference:
class: File
- location: keep:qr1hi-4zz18-pwid4w22a40jp8l/19.fasta.bwt
+ location: keep:jutro-4zz18-tv416l321i4r01e/19.fasta.bwt
read_p1:
class: File
- location: keep:qr1hi-4zz18-h615rgfmqt3wje0/HWI-ST1027_129_D0THKACXX.1_1.fastq
+ location: keep:jutro-4zz18-8k5hsvee0izv2g3/HWI-ST1027_129_D0THKACXX.1_1.fastq
read_p2:
class: File
- location: keep:qr1hi-4zz18-h615rgfmqt3wje0/HWI-ST1027_129_D0THKACXX.1_2.fastq
+ location: keep:jutro-4zz18-8k5hsvee0izv2g3/HWI-ST1027_129_D0THKACXX.1_2.fastq
group_id: arvados_tutorial
sample_id: HWI-ST1027_129
PL: illumina
hints:
DockerRequirement:
- dockerPull: lh3lh3/bwa
+ dockerPull: quay.io/biocontainers/bwa:0.7.17--ha92aebf_3
-baseCommand: [mem]
+baseCommand: [bwa, mem]
arguments:
- {prefix: "-t", valueFrom: $(runtime.cores)}
- - {prefix: "-R", valueFrom: "@RG\tID:$(inputs.group_id)\tPL:$(inputs.PL)\tSM:$(inputs.sample_id)"}
+ - {prefix: "-R", valueFrom: '@RG\\\tID:$(inputs.group_id)\\\tPL:$(inputs.PL)\\\tSM:$(inputs.sample_id)'}
inputs:
reference:
---
layout: default
navsection: userguide
-title: "Using arvados-cwl-runner"
+title: "arvados-cwl-runner options"
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --name "Example bwa run" --output-name "Example bwa output" bwa-mem.cwl bwa-mem-input.yml</span>
arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
-2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to zzzzz-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job zzzzz-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Complete
2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
{
"aligned_sam": {
<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --no-wait bwa-mem.cwl bwa-mem-input.yml</span>
arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Uploaded to qr1hi-4zz18-eqnfwrow8aysa9q
-2016-06-30 15:07:52 arvados.cwl-runner[12480] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-qr1hi-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Uploaded to zzzzz-4zz18-eqnfwrow8aysa9q
+2016-06-30 15:07:52 arvados.cwl-runner[12480] INFO: Submitted job zzzzz-8i9sb-fm2n3b1w0l6bskg
+zzzzz-8i9sb-fm2n3b1w0l6bskg
</code></pre>
</notextile>
<notextile>
<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --local bwa-mem.cwl bwa-mem-input.yml</span>
arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-07-01 10:05:19 arvados.cwl-runner[16290] INFO: Pipeline instance qr1hi-d1hrv-92wcu6ldtio74r4
-2016-07-01 10:05:28 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Queued
-2016-07-01 10:05:29 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Running
-2016-07-01 10:05:45 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Complete
+2016-07-01 10:05:19 arvados.cwl-runner[16290] INFO: Pipeline instance zzzzz-d1hrv-92wcu6ldtio74r4
+2016-07-01 10:05:28 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-2nzzfbuf9zjrj4g) is Queued
+2016-07-01 10:05:29 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-2nzzfbuf9zjrj4g) is Running
+2016-07-01 10:05:45 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-2nzzfbuf9zjrj4g) is Complete
2016-07-01 10:05:46 arvados.cwl-runner[16290] INFO: Overall process status is success
{
"aligned_sam": {
---
layout: default
navsection: userguide
-title: "Running an Arvados workflow"
+title: "Starting a Workflow at the Command Line"
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
{% include 'tutorial_expectations' %}
-{% include 'notebox_begin' %}
-
-By default, the @arvados-cwl-runner@ is installed on Arvados shell nodes. If you want to submit jobs from somewhere else, such as your workstation, you may install "arvados-cwl-runner.":#setup
-
-{% include 'notebox_end' %}
-
This tutorial will demonstrate how to submit a workflow at the command line using @arvados-cwl-runner@.
-h2. Running arvados-cwl-runner
+# "Get the tutorial files":#get-files
+# "Submitting a workflow to an Arvados cluster":#submitting
+# "Registering a workflow to use in Workbench":#registering
+# "Make a workflow file directly executable":#executable
-h3. Get the example files
+h2(#get-files). Get the tutorial files
-The tutorial files are located in the "documentation section of the Arvados source repository:":https://github.com/arvados/arvados/tree/master/doc/user/cwl/bwa-mem
+The tutorial files are located in the documentation section of the Arvados source repository, which can be found on "git.arvados.org":https://git.arvados.org/arvados.git/tree/HEAD:/doc/user/cwl/bwa-mem or "github":https://github.com/arvados/arvados/tree/master/doc/user/cwl/bwa-mem
<notextile>
-<pre><code>~$ <span class="userinput">git clone https://github.com/arvados/arvados</span>
+<pre><code>~$ <span class="userinput">git clone https://git.arvados.org/arvados.git</span>
~$ <span class="userinput">cd arvados/doc/user/cwl/bwa-mem</span>
</code></pre>
</notextile>
-The tutorial data is hosted on "https://playground.arvados.org":https://playground.arvados.org (also referred to by the identifier *qr1hi*). If you are using a different Arvados instance, you may need to copy the data to your own instance. The easiest way to do this is with "arv-copy":{{site.baseurl}}/user/topics/arv-copy.html (this requires signing up for a free playground.arvados.org account).
+The tutorial data is hosted on "https://playground.arvados.org":https://playground.arvados.org (also referred to by the identifier *pirca*). If you are using a different Arvados instance, you may need to copy the data to your own instance. One way to do this is with "arv-copy":{{site.baseurl}}/user/topics/arv-copy.html (this requires signing up for a free playground.arvados.org account).
<notextile>
-<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst settings 2463fa9efeb75e099685528b3b9071e0+438</span>
-~$ <span class="userinput">arv-copy --src qr1hi --dst settings ae480c5099b81e17267b7445e35b4bc7+180</span>
-~$ <span class="userinput">arv-copy --src qr1hi --dst settings 655c6cd07550151b210961ed1d3852cf+57</span>
+<pre><code>~$ <span class="userinput">arv-copy --src pirca --dst settings 2463fa9efeb75e099685528b3b9071e0+438</span>
+~$ <span class="userinput">arv-copy --src pirca --dst settings ae480c5099b81e17267b7445e35b4bc7+180</span>
</code></pre>
</notextile>
If you do not wish to create an account on "https://playground.arvados.org":https://playground.arvados.org, you may download the files anonymously and upload them to your local Arvados instance:
-"https://playground.arvados.org/collections/2463fa9efeb75e099685528b3b9071e0+438":https://playground.arvados.org/collections/2463fa9efeb75e099685528b3b9071e0+438
-
-"https://playground.arvados.org/collections/ae480c5099b81e17267b7445e35b4bc7+180":https://playground.arvados.org/collections/ae480c5099b81e17267b7445e35b4bc7+180
+"https://collections.pirca.arvadosapi.com/c=2463fa9efeb75e099685528b3b9071e0+438/":https://collections.pirca.arvadosapi.com/c=2463fa9efeb75e099685528b3b9071e0+438/
-"https://playground.arvados.org/collections/655c6cd07550151b210961ed1d3852cf+57":https://playground.arvados.org/collections/655c6cd07550151b210961ed1d3852cf+57
+"https://collections.pirca.arvadosapi.com/c=ae480c5099b81e17267b7445e35b4bc7+180/":https://collections.pirca.arvadosapi.com/c=ae480c5099b81e17267b7445e35b4bc7+180/
-h2. Submitting a workflow to an Arvados cluster
+h2(#submitting). Submitting a workflow to an Arvados cluster
h3. Submit a workflow and wait for results
<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner bwa-mem.cwl bwa-mem-input.yml</span>
arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
-2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to zzzzz-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job zzzzz-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Complete
2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
{
"aligned_sam": {
If you reference a local file which is not in @arv-mount@, then @arvados-cwl-runner@ will upload the file to Keep and use the Keep URI reference from the upload.
-You can also execute CWL files directly from Keep:
+You can also execute CWL files that have been uploaded Keep:
<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner keep:655c6cd07550151b210961ed1d3852cf+57/bwa-mem.cwl bwa-mem-input.yml</span>
+<pre><code>
+~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arv-put --portable-data-hash --name "bwa-mem.cwl" bwa-mem.cwl</span>
+2020-08-20 13:40:02 arvados.arv_put[12976] INFO: Collection saved as 'bwa-mem.cwl'
+f141fc27e7cfa7f7b6d208df5e0ee01b+59
+~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner keep:f141fc27e7cfa7f7b6d208df5e0ee01b+59/bwa-mem.cwl bwa-mem-input.yml</span>
arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
-2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to zzzzz-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job zzzzz-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Complete
2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
{
"aligned_sam": {
</code></pre>
</notextile>
+Note: uploading a workflow file to Keep is _not_ the same as registering the workflow for use in Workbench. See "Registering a workflow to use in Workbench":#registering below.
+
h3. Work reuse
Workflows submitted with @arvados-cwl-runner@ will take advantage of Arvados job reuse. If you submit a workflow which is identical to one that has run before, it will short cut the execution and return the result of the previous run. This also applies to individual workflow steps. For example, a two step workflow where the first step has run before will reuse results for first step and only execute the new second step. You can disable this behavior with @--disable-reuse@.
h3. Command line options
-See "Using arvados-cwl-runner":{{site.baseurl}}/user/cwl/cwl-run-options.html
+See "arvados-cwl-runner options":{{site.baseurl}}/user/cwl/cwl-run-options.html
-h2(#setup). Setting up arvados-cwl-runner
+h2(#registering). Registering a workflow to use in Workbench
-By default, the @arvados-cwl-runner@ is installed on Arvados shell nodes. If you want to submit jobs from somewhere else, such as your workstation, you may install @arvados-cwl-runner@ using @pip@:
+Use @--create-workflow@ to register a CWL workflow with Arvados. This enables you to share workflows with other Arvados users, and run them by clicking the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a process...</span> button on the Workbench Dashboard and on the command line by UUID.
<notextile>
-<pre><code>~$ <span class="userinput">virtualenv ~/venv</span>
-~$ <span class="userinput">. ~/venv/bin/activate</span>
-~$ <span class="userinput">pip install -U setuptools</span>
-~$ <span class="userinput">pip install arvados-cwl-runner</span>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --create-workflow bwa-mem.cwl</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Upload local files: "bwa-mem.cwl"
+2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Uploaded to zzzzz-4zz18-7e0hedrmkuyoei3
+2016-07-01 12:21:01 arvados.cwl-runner[15796] INFO: Created template zzzzz-p5p6p-rjleou1dwr167v5
+zzzzz-p5p6p-rjleou1dwr167v5
</code></pre>
</notextile>
-h3. Check Docker access
+You can provide a partial input file to set default values for the workflow input parameters. You can also use the @--name@ option to set the name of the workflow:
-In order to pull and upload Docker images, @arvados-cwl-runner@ requires access to Docker. You do not need Docker if the Docker images you intend to use are already available in Arvados.
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --name "My workflow with defaults" --create-workflow bwa-mem.cwl bwa-mem-template.yml</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Upload local files: "bwa-mem.cwl"
+2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Uploaded to zzzzz-4zz18-0f91qkovk4ml18o
+2016-07-01 14:09:50 arvados.cwl-runner[3730] INFO: Created template zzzzz-p5p6p-0deqe6nuuyqns2i
+zzzzz-p5p6p-zuniv58hn8d0qd8
+</code></pre>
+</notextile>
-You can determine if you have access to Docker by running @docker version@:
+h3. Running registered workflows at the command line
+
+You can run a registered workflow at the command line by its UUID:
<notextile>
-<pre><code>~$ <span class="userinput">docker version</span>
-Client:
- Version: 1.9.1
- API version: 1.21
- Go version: go1.4.2
- Git commit: a34a1d5
- Built: Fri Nov 20 12:59:02 UTC 2015
- OS/Arch: linux/amd64
-
-Server:
- Version: 1.9.1
- API version: 1.21
- Go version: go1.4.2
- Git commit: a34a1d5
- Built: Fri Nov 20 12:59:02 UTC 2015
- OS/Arch: linux/amd64
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner pirca-7fd4e-3nqbw08vtjl8ybz --help</span>
+INFO /home/peter/work/scripts/venv3/bin/arvados-cwl-runner 2.1.0.dev20200814195416, arvados-python-client 2.1.0.dev20200814195416, cwltool 3.0.20200807132242
+INFO Resolved 'pirca-7fd4e-3nqbw08vtjl8ybz' to 'arvwf:pirca-7fd4e-3nqbw08vtjl8ybz#main'
+usage: pirca-7fd4e-3nqbw08vtjl8ybz [-h] [--PL PL] [--group_id GROUP_ID]
+ [--read_p1 READ_P1] [--read_p2 READ_P2]
+ [--reference REFERENCE]
+ [--sample_id SAMPLE_ID]
+ [job_order]
+
+positional arguments:
+ job_order Job input json file
+
+optional arguments:
+ -h, --help show this help message and exit
+ --PL PL
+ --group_id GROUP_ID
+ --read_p1 READ_P1 The reads, in fastq format.
+ --read_p2 READ_P2 For mate paired reads, the second file (optional).
+ --reference REFERENCE
+ The index files produced by `bwa index`
+ --sample_id SAMPLE_ID
</code></pre>
</notextile>
-If this returns an error, contact the sysadmin of your cluster for assistance.
+h2(#executable). Make a workflow file directly executable
+
+You can make a workflow file directly executable (@cwl-runner@ should be an alias to @arvados-cwl-runner@) by adding the following line to the top of the file:
+
+<notextile>
+<pre><code>#!/usr/bin/env cwl-runner
+</code></pre>
+</notextile>
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">./bwa-mem.cwl bwa-mem-input.yml</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to zzzzz-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job zzzzz-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
+{
+ "aligned_sam": {
+ "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
+ "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+ "class": "File",
+ "size": 30738986
+ }
+}
+</code></pre>
+</notextile>
+
+You can even make an input file directly executable the same way with the following two lines at the top:
+
+<notextile>
+<pre><code>#!/usr/bin/env cwl-runner
+cwl:tool: <span class="userinput">bwa-mem.cwl</span>
+</code></pre>
+</notextile>
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">./bwa-mem-input.yml</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to zzzzz-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job zzzzz-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (zzzzz-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
+{
+ "aligned_sam": {
+ "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
+ "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+ "class": "File",
+ "size": 30738986
+ }
+}
+</code></pre>
+</notextile>
+
+h2(#setup). Setting up arvados-cwl-runner
+
+See "Arvados CWL Runner":{{site.baseurl}}/sdk/python/arvados-cwl-runner.html
---
layout: default
navsection: userguide
-title: Writing Portable High-Performance Workflows
+title: Guidelines for Writing High-Performance Portable Workflows
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
---
layout: default
navsection: userguide
-title: CWL version and API support
+title: CWL version support
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
+Arvados supports CWL v1.0, v1.1 and v1.2.
+
h2(#v12). Upgrading your workflows to CWL v1.2
If you are starting from a CWL v1.0 document, see "Upgrading your workflows to CWL v1.1":#v11 below.
<notextile>
<pre><code>$ <span class="userinput">arv user current</span>
{
- "href":"https://qr1hi.arvadosapi.com/arvados/v1/users/qr1hi-xioed-9z2p3pn12yqdaem",
+ "href":"https://zzzzz.arvadosapi.com/arvados/v1/users/zzzzz-xioed-9z2p3pn12yqdaem",
"kind":"arvados#user",
"etag":"8u0xwb9f3otb2xx9hto4wyo03",
- "uuid":"qr1hi-tpzed-92d3kxnimy3d4e8",
- "owner_uuid":"qr1hi-tpqed-23iddeohxta2r59",
+ "uuid":"zzzzz-tpzed-92d3kxnimy3d4e8",
+ "owner_uuid":"zzzzz-tpqed-23iddeohxta2r59",
"created_at":"2013-12-02T17:05:47Z",
- "modified_by_client_uuid":"qr1hi-xxfg8-owxa2oa2s33jyej",
- "modified_by_user_uuid":"qr1hi-tpqed-23iddeohxta2r59",
+ "modified_by_client_uuid":"zzzzz-xxfg8-owxa2oa2s33jyej",
+ "modified_by_user_uuid":"zzzzz-tpqed-23iddeohxta2r59",
"modified_at":"2013-12-02T17:07:08Z",
"updated_at":"2013-12-05T19:51:08Z",
"email":"you@example.com",
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-This document is for accessing an Arvados VM using SSH keys in Unix environments (Linux, OS X, Cygwin). If you would like to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Windows environment, please visit the "Accessing an Arvados VM with SSH - Windows Environments":ssh-access-windows.html page.
+This document is for accessing an Arvados VM using SSH keys in Unix-like environments (Linux, macOS, Cygwin, Windows Subsystem for Linux). If you would like to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Windows environment, please visit the "Accessing an Arvados VM with SSH - Windows Environments":ssh-access-windows.html page.
{% include 'ssh_intro' %}
Now you can set up @ssh-agent@ (next) or proceed with "adding your key to the Arvados Workbench.":#workbench
-h3. Set up ssh-agent (recommended)
+h3. Set up ssh-agent (optional)
If you find you are entering your passphrase frequently, you can use @ssh-agent@ to manage your credentials. Use @ssh-add -l@ to test if you already have ssh-agent running:
{% include 'ssh_addkey' %}
-h3. Connecting to the virtual machine
+h3. Connecting directly
-Use the following command to connect to the _shell_ VM instance as _you_. Replace *<code>you@shell</code>* at the end of the following command with your *login* and *hostname* from Workbench:
+If the VM is available on the public Internet (or you are on the same private network as the VM) you can connect directly with @ssh@. You can probably copy-and-paste the text from *Command line* column directly into a terminal.
-notextile. <pre><code>$ <span class="userinput">ssh -o "ProxyCommand ssh -p2222 turnout@switchyard.{{ site.arvados_api_host }} -x -a <b>shell</b>" -x <b>you@shell</b></span></code></pre>
+Use the following example command to connect as _you_ to the _shell.ClusterID.example.com_ VM instance. Replace *<code>you@shell.ClusterID.example.com</code>* at the end of the following command with your *login* and *hostname* from Workbench.
+
+notextile. <pre><code>$ <span class="userinput">ssh <b>you@shell.ClusterID.example.com</b></span></code></pre>
+
+h3. Connecting through switchyard
+
+Some Arvados installations use "switchyard" to isolate shell VMs from the public Internet.
+
+Use the following example command to connect to the _shell_ VM instance as _you_. Replace *<code>you@shell</code>* at the end of the following command with your *login* and *hostname* from Workbench:
+
+notextile. <pre><code>$ <span class="userinput">ssh -o "ProxyCommand ssh -p2222 turnout@switchyard.ClusterID.example.com -x -a <b>shell</b>" -x <b>you@shell</b></span></code></pre>
This command does several things at once. You usually cannot log in directly to virtual machines over the public Internet. Instead, you log into a "switchyard" server and then tell the switchyard which virtual machine you want to connect to.
You should now be able to log into the Arvados VM and "check your environment.":check-environment.html
-h3. Configuration (recommended)
+h4. Configuration (recommended)
The command line above is cumbersome, but you can configure SSH to remember many of these settings. Add this text to the file @.ssh/config@ in your home directory (create a new file if @.ssh/config@ doesn't exist):
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-This document is for accessing an Arvados VM using SSH keys in Windows environments. If you would like to use to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Unix environment (Linux, OS X, Cygwin), please visit the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.
+This document is for accessing an Arvados VM using SSH keys in Windows environments using PuTTY. If you would like to use to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Unix-like environment (Linux, macOS, Cygwin, or Windows Subsystem for Linux), please visit the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.
{% include 'ssh_intro' %}
h1(#gettingkey). Getting your SSH key
-(Note: if you are using the SSH client that comes with "Cygwin":http://cygwin.com, please use instructions found in the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.)
+(Note: If you are using the SSH client that comes with "Cygwin":http://cygwin.com or Windows Subsystem for Linux (WSL) please use instructions found in the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.)
We will be using PuTTY to connect to Arvados. "PuTTY":http://www.chiark.greenend.org.uk/~sgtatham/putty/ is a free (MIT-licensed) Win32 Telnet and SSH client. PuTTY includes all the tools a Windows user needs to create private keys and make SSH connections to your virtual machines in the Arvados Cloud.
h3. Initial configuration
+h4. Connecting directly
+
+# Open PuTTY from the Start Menu.
+# On the Session screen set the Host Name (or IP address) to “shell.ClusterID.example.com”, which is the hostname listed in the _Virtual Machines_ page.
+# On the Session screen set the Port to “22”.
+# On the Connection %(rarr)→% Data screen set the Auto-login username to the username listed in the *Login name* column on the Arvados Workbench Virtual machines_ page.
+# Return to the Session screen. In the Saved Sessions box, enter a name for this configuration and click Save.
+
+h4. Connecting through switchyard
+
# Open PuTTY from the Start Menu.
# On the Session screen set the Host Name (or IP address) to “shell”, which is the hostname listed in the _Virtual Machines_ page.
# On the Session screen set the Port to “22”.
Webshell gives you access to an arvados virtual machine from your browser with no additional setup.
+{% include 'notebox_begin' %}
+Some Arvados clusters may not have webshell set up. If you do not see a "Log in" button or "web shell" column, you will have to follow the "Unix":ssh-access-unix.html or "Windows":ssh-access-windows.html @ssh@ instructions.
+{% include 'notebox_end' %}
+
In the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Virtual machines* to see the list of virtual machines you can access. If you do not have access to any virtual machines, please click on <span class="btn btn-sm btn-primary">Send request for shell access</span> or send an email to "support@curoverse.com":mailto:support@curoverse.com.
Each row in the Virtual Machines panel lists the hostname of the VM, along with a <code>Log in as *you*</code> button under the column "Web shell". Clicking on this button will open up a webshell terminal for you in a new browser tab and log you in.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-If you are using the default Arvados instance for this guide, you can Access Arvados Workbench using this link:
+{% include 'notebox_begin' %}
+This guide covers the classic Arvados Workbench web application, sometimes referred to as "Workbench 1". There is also a new Workbench web application under development called "Workbench 2". Sites which have both Workbench applications installed will have a dropdown menu option "Switch to Workbench 2" to switch between versions.
+
+This guide will be updated to cover "Workbench 2" in the future.
+{% include 'notebox_end' %}
+
+If you are using the "playground" Arvados instance for this guide, you can Access Arvados Workbench using this link:
<a href="{{site.arvados_workbench_host}}/" target="_blank">{{site.arvados_workbench_host}}/</a>
(If you are using a different Arvados instance than the default for this guide, replace *{{ site.arvados_workbench_host }}* with your private instance in all of the examples in this guide.)
-You may be asked to log in using a Google account. Arvados uses only your name and email address from Google services for identification, and will never access any personal information. If you are accessing Arvados for the first time, the Workbench may indicate your account status is *New / inactive*. If this is the case, contact the administrator of the Arvados instance to request activation of your account.
+You will be asked to log in. Arvados uses only your name and email address for identification, and will never access any personal information. If you are accessing Arvados for the first time, the Workbench may indicate your account status is *New / inactive*. If this is the case, contact the administrator of the Arvados instance to request activation of your account.
-Once your account is active, logging in to the Workbench will present you with the Dashboard. This gives a summary of your projects and recent activity in the Arvados instance. "You are now ready to run your first pipeline.":{{ site.baseurl }}/user/tutorials/tutorial-workflow-workbench.html
+Once your account is active, logging in to the Workbench will present you with the Dashboard. This gives a summary of your projects and recent activity in the Arvados instance. You are now ready to "upload data":{{ site.baseurl }}/user/tutorials/tutorial-keep.html or "run your first workflow.":{{ site.baseurl }}/user/tutorials/tutorial-workflow-workbench.html
!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/workbench-dashboard.png!
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-This guide provides a reference for using Arvados to solve scientific big data problems, including:
+Arvados is an open source platform for managing, processing, and sharing genomic and other large scientific and biomedical data. This guide provides a reference for using Arvados to solve scientific big data problems, including:
* Robust storage of very large files, such as whole genome sequences, using the "Arvados Keep":{{site.baseurl}}/user/tutorials/tutorial-keep.html content-addressable cluster file system.
* Running compute-intensive scientific analysis pipelines, such as genomic alignment and variant calls using the "Arvados Crunch":{{site.baseurl}}/user/tutorials/intro-crunch.html cluster compute engine.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-{% include 'crunch1only_begin' %}
-On those sites, the "copy a pipeline template" feature described below is not available. However, "copy a workflow" feature is not yet implemented.
-{% include 'crunch1only_end' %}
-
This tutorial describes how to copy Arvados objects from one cluster to another by using @arv-copy@.
{% include 'tutorial_expectations' %}
h2. arv-copy
-@arv-copy@ allows users to copy collections and pipeline templates from one cluster to another. By default, @arv-copy@ will recursively go through a template and copy all dependencies associated with the object.
+@arv-copy@ allows users to copy collections and workflows from one cluster to another. By default, @arv-copy@ will recursively go through the workflow and copy all dependencies associated with the object.
-For example, let's copy from the <a href="https://playground.arvados.org/">Arvados playground</a>, also known as *qr1hi*, to *dst_cluster*. The names *qr1hi* and *dst_cluster* are interchangable with any cluster name. You can find the cluster name from the prefix of the uuid of the object you want to copy. For example, in *qr1hi*-4zz18-tci4vn4fa95w0zx, the cluster name is qr1hi.
+For example, let's copy from the <a href="https://playground.arvados.org/">Arvados playground</a>, also known as *pirca*, to *dstcl*. The names *pirca* and *dstcl* are interchangable with any cluster id. You can find the cluster name from the prefix of the uuid of the object you want to copy. For example, in *zzzzz*-4zz18-tci4vn4fa95w0zx, the cluster name is *zzzzz* .
-In order to communicate with both clusters, you must create custom configuration files for each cluster. In the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Current token*. Copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ in both of your clusters. Then, create two configuration files, one for each cluster. The names of the files must have the format of *ClusterID.conf*. In our example, let's make two files, one for *qr1hi* and one for *dst_cluster*. From your *Current token* page in *qr1hi* and *dst_cluster*, copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@.
+In order to communicate with both clusters, you must create custom configuration files for each cluster. In the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Current token*. Copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ in both of your clusters. Then, create two configuration files in @~/.config/arvados@, one for each cluster. The names of the files must have the format of *ClusterID.conf*. Navigate to the *Current token* page on each of *pirca* and *dstcl* to get the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@.
!{display: block;margin-left: 25px;margin-right: auto;}{{ site.baseurl }}/images/api-token-host.png!
-Copy your @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ into the config files as shown below in the shell account from which you are executing the commands. For example, the default shell you may have access to is shell.qr1hi. You can add these files in ~/.config/arvados/ in the qr1hi shell terminal.
+The config file consists of two lines, one for ARVADOS_API_HOST and one for ARVADOS_API_TOKEN:
-<notextile>
-<pre><code>~$ <span class="userinput">cd ~/.config/arvados</span>
-~$ <span class="userinput">echo "ARVADOS_API_HOST=qr1hi.arvadosapi.com" >> qr1hi.conf</span>
-~$ <span class="userinput">echo "ARVADOS_API_TOKEN=123456789abcdefghijkl" >> qr1hi.conf</span>
-~$ <span class="userinput">echo "ARVADOS_API_HOST=dst_cluster.arvadosapi.com" >> dst_cluster.conf</span>
-~$ <span class="userinput">echo "ARVADOS_API_TOKEN=987654321lkjihgfedcba" >> dst_cluster.conf</span>
-</code></pre>
-</notextile>
+<pre>
+ARVADOS_API_HOST=zzzzz.arvadosapi.com
+ARVADOS_API_TOKEN=v2/zzzzz-gj3su-xxxxxxxxxxxxxxx/123456789abcdefghijkl
+</pre>
+
+Copy your @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ into the config files as shown below in the shell account from which you are executing the commands. In our example, you need two files, @~/.config/arvados/pirca.conf@ and @~/.config/arvados/dstcl.conf@.
-Now you're ready to copy between *qr1hi* and *dst_cluster*!
+Now you're ready to copy between *pirca* and *dstcl*!
h3. How to copy a collection
-First, select the uuid of the collection you want to copy from the source cluster. The uuid can be found in the collection display page in the collection summary area (top left box), or from the URL bar (the part after @collections/...@)
+First, determine the uuid or portable data hash of the collection you want to copy from the source cluster. The uuid can be found in the collection display page in the collection summary area (top left box), or from the URL bar (the part after @collections/...@)
-Now copy the collection from *qr1hi* to *dst_cluster*. We will use the uuid @qr1hi-4zz18-tci4vn4fa95w0zx@ as an example. You can find this collection in the <a href="https://playground.arvados.org/collections/qr1hi-4zz18-tci4vn4fa95w0zx">lobSTR v.3 project on playground.arvados.org</a>.
+Now copy the collection from *pirca* to *dstcl*. We will use the uuid @jutro-4zz18-tv416l321i4r01e@ as an example. You can find this collection on <a href="https://playground.arvados.org/collections/jutro-4zz18-tv416l321i4r01e">playground.arvados.org</a>.
<notextile>
-<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster qr1hi-4zz18-tci4vn4fa95w0zx</span>
-qr1hi-4zz18-tci4vn4fa95w0zx: 6.1M / 6.1M 100.0%
-arvados.arv-copy[1234] INFO: Success: created copy with uuid dst_cluster-4zz18-8765943210cdbae
-</code></pre>
-</notextile>
-
-The output of arv-copy displays the uuid of the collection generated in the destination cluster. By default, the output is placed in your home project in the destination cluster. If you want to place your collection in a pre-created project, you can specify the project you want it to be in using the tag @--project-uuid@ followed by the project uuid.
-
-For example, this will copy the collection to project dst_cluster-j7d0g-a894213ukjhal12 in the destination cluster.
-
-<notextile> <pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --project-uuid dst_cluster-j7d0g-a894213ukjhal12 qr1hi-4zz18-tci4vn4fa95w0zx</span>
+<pre><code>~$ <span class="userinput">arv-copy --src pirca --dst dstcl jutro-4zz18-tv416l321i4r01e</span>
+jutro-4zz18-tv416l321i4r01e: 6.1M / 6.1M 100.0%
+arvados.arv-copy[1234] INFO: Success: created copy with uuid dstcl-4zz18-xxxxxxxxxxxxxxx
</code></pre>
</notextile>
-h3. How to copy a pipeline template
-
-{% include 'arv_copy_expectations' %}
-
-We will use the uuid @qr1hi-p5p6p-9pkaxt6qjnkxhhu@ as an example pipeline template.
+You can also copy by content address:
<notextile>
-<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial qr1hi-p5p6p-9pkaxt6qjnkxhhu</span>
-To git@git.dst_cluster.arvadosapi.com:$USER/tutorial.git
- * [new branch] git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d -> git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d
-arvados.arv-copy[19694] INFO: Success: created copy with uuid dst_cluster-p5p6p-rym2h5ub9m8ofwj
+<pre><code>~$ <span class="userinput">arv-copy --src pirca --dst dstcl 2463fa9efeb75e099685528b3b9071e0+438</span>
+2463fa9efeb75e099685528b3b9071e0+438: 6.1M / 6.1M 100.0%
+arvados.arv-copy[1234] INFO: Success: created copy with uuid dstcl-4zz18-xxxxxxxxxxxxxxx
</code></pre>
</notextile>
-New branches in the destination git repo will be created for each branch used in the pipeline template. For example, if your source branch was named ac21f0d45a76294aaca0c0c0fdf06eb72d03368d, your new branch will be named @git_git_qr1hi_arvadosapi_com_reponame_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d@.
-
-By default, if you copy a pipeline template recursively, you will find that the template as well as all the dependencies are in your home project.
+The output of arv-copy displays the uuid of the collection generated in the destination cluster. By default, the output is placed in your home project in the destination cluster. If you want to place your collection in an existing project, you can specify the project you want it to be in using the tag @--project-uuid@ followed by the project uuid.
-If you would like to copy the object without dependencies, you can use the @--no-recursive@ tag.
+For example, this will copy the collection to project dstcl-j7d0g-a894213ukjhal12 in the destination cluster.
-For example, we can copy the same object using this tag.
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial --no-recursive qr1hi-p5p6p-9pkaxt6qjnkxhhu</span>
+<notextile> <pre><code>~$ <span class="userinput">arv-copy --src pirca --dst dstcl --project-uuid dstcl-j7d0g-a894213ukjhal12 jutro-4zz18-tv416l321i4r01e
</code></pre>
</notextile>
h3. How to copy a workflow
-We will use the uuid @zzzzz-7fd4e-sampleworkflow1@ as an example workflow.
+We will use the uuid @jutro-7fd4e-mkmmq53m1ze6apx@ as an example workflow.
<notextile>
-<pre><code>~$ <span class="userinput">arv-copy --src zzzzz --dst dst_cluster --dst-git-repo $USER/tutorial zzzzz-7fd4e-sampleworkflow1</span>
-zzzzz-4zz18-jidprdejysravcr: 1143M / 1143M 100.0%
-2017-01-04 04:11:58 arvados.arv-copy[5906] INFO:
-2017-01-04 04:11:58 arvados.arv-copy[5906] INFO: Success: created copy with uuid dst_cluster-7fd4e-ojtgpne594ubkt7
+<pre><code>~$ <span class="userinput">arv-copy --src jutro --dst pirca --project-uuid pirca-j7d0g-ecak8knpefz8ere jutro-7fd4e-mkmmq53m1ze6apx</span>
+ae480c5099b81e17267b7445e35b4bc7+180: 23M / 23M 100.0%
+2463fa9efeb75e099685528b3b9071e0+438: 156M / 156M 100.0%
+jutro-4zz18-vvvqlops0a0kpdl: 94M / 94M 100.0%
+2020-08-19 17:04:13 arvados.arv-copy[4789] INFO:
+2020-08-19 17:04:13 arvados.arv-copy[4789] INFO: Success: created copy with uuid pirca-7fd4e-s0tw9rfbkpo2fmx
</code></pre>
</notextile>
-The name, description, and workflow definition from the original workflow will be used for the destination copy. In addition, any *locations* and *docker images* found in the src workflow definition will also be copied to the destination recursively.
+The name, description, and workflow definition from the original workflow will be used for the destination copy. In addition, any *collections* and *docker images* referenced in the source workflow definition will also be copied to the destination.
If you would like to copy the object without dependencies, you can use the @--no-recursive@ flag.
-
-For example, we can copy the same object non-recursively using the following:
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv-copy --src zzzzz --dst dst_cluster --dst-git-repo $USER/tutorial --no-recursive zzzzz-7fd4e-sampleworkflow1</span>
-</code></pre>
-</notextile>
---
layout: default
navsection: userguide
-title: "Customizing Crunch environment using Docker"
+title: "Working with Docker images"
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-This page describes how to customize the runtime environment (e.g., the programs, libraries, and other dependencies needed to run a job) that a crunch script will be run in using "Docker.":https://www.docker.com/ Docker is a tool for building and running containers that isolate applications from other applications running on the same node. For detailed information about Docker, see the "Docker User Guide.":https://docs.docker.com/userguide/
+This page describes how to set up the runtime environment (e.g., the programs, libraries, and other dependencies needed to run a job) that a workflow step will be run in using "Docker.":https://www.docker.com/ Docker is a tool for building and running containers that isolate applications from other applications running on the same node. For detailed information about Docker, see the "Docker User Guide.":https://docs.docker.com/userguide/
-This page will demonstrate how to:
+This page describes:
-# Fetch the arvados/jobs Docker image
-# Manually install additional software into the container
-# Create a new custom image
-# Upload that image to Arvados for use by Crunch jobs
-# Share your image with others
+# "Create a custom image using a Dockerfile":#create
+# "Uploading an image to Arvados":#upload
+# "Sources of pre-built bioinformatics Docker images":#sources
{% include 'tutorial_expectations_workstation' %}
You also need ensure that "Docker is installed,":https://docs.docker.com/installation/ the Docker daemon is running, and you have permission to access Docker. You can test this by running @docker version@. If you receive a permission denied error, your user account may need to be added to the @docker@ group. If you have root access, you can add yourself to the @docker@ group using @$ sudo addgroup $USER docker@ then log out and log back in again; otherwise consult your local sysadmin.
-h2. Fetch a starting image
+h2(#create). Create a custom image using a Dockerfile
-The easiest way to begin is to start from the "arvados/jobs" image which already has the Arvados SDK installed along with other configuration required for use with Crunch.
+This example shows how to create a Docker image and add the R package.
-Download the latest "arvados/jobs" image from the Docker registry:
+First, create new directory called @docker-example@, in that directory create a file called @Dockerfile@.
<notextile>
-<pre><code>$ <span class="userinput">docker pull arvados/jobs:latest</span>
-Pulling repository arvados/jobs
-3132168f2acb: Download complete
-a42b7f2c59b6: Download complete
-e5afdf26a7ae: Download complete
-5cae48636278: Download complete
-7a4f91b70558: Download complete
-a04a275c1fd6: Download complete
-c433ff206a22: Download complete
-b2e539b45f96: Download complete
-073b2581c6be: Download complete
-593915af19dc: Download complete
-32260b35005e: Download complete
-6e5b860c1cde: Download complete
-95f0bfb43d4d: Download complete
-c7fd77eedb96: Download complete
-0d7685aafd00: Download complete
+<pre><code>$ <span class="userinput">mkdir docker-example-r-base</span>
+$ <span class="userinput">cd docker-example-r-base</span>
</code></pre>
</notextile>
-h2. Install new packages
-
-Next, enter the container using @docker run@, providing the arvados/jobs image and the program you want to run (in this case the bash shell).
-
<notextile>
-<pre><code>$ <span class="userinput">docker run --interactive --tty --user root arvados/jobs /bin/bash</span>
-root@fbf1d0f529d5:/#
+<pre><code>FROM ubuntu:bionic
+RUN apt-get update && apt-get -yq --no-install-recommends install r-base-core
</code></pre>
</notextile>
-Next, update the package list using @apt-get update@.
+The "RUN" command is executed inside the container and can be any shell command line. You are not limited to installing Debian packages. You may compile programs or libraries from source and install them, edit systemwide configuration files, use other package managers such as @pip@ or @gem@, and perform any other customization necessary to run your program.
-<notextile>
-<pre><code>root@fbf1d0f529d5:/# apt-get update
-Get:2 http://apt.arvados.org stretch-dev InRelease [3260 B]
-Get:1 http://security-cdn.debian.org/debian-security stretch/updates InRelease [94.3 kB]
-Ign:3 http://cdn-fastly.deb.debian.org/debian stretch InRelease
-Get:4 http://cdn-fastly.deb.debian.org/debian stretch-updates InRelease [91.0 kB]
-Get:5 http://apt.arvados.org stretch-dev/main amd64 Packages [208 kB]
-Get:6 http://cdn-fastly.deb.debian.org/debian stretch Release [118 kB]
-Get:7 http://security-cdn.debian.org/debian-security stretch/updates/main amd64 Packages [499 kB]
-Get:8 http://cdn-fastly.deb.debian.org/debian stretch Release.gpg [2434 B]
-Get:9 http://cdn-fastly.deb.debian.org/debian stretch-updates/main amd64 Packages.diff/Index [10.6 kB]
-Get:10 http://cdn-fastly.deb.debian.org/debian stretch-updates/main amd64 Packages 2019-07-08-0821.07.pdiff [445 B]
-Get:10 http://cdn-fastly.deb.debian.org/debian stretch-updates/main amd64 Packages 2019-07-08-0821.07.pdiff [445 B]
-Fetched 1026 kB in 0s (1384 kB/s)
-Reading package lists... Done
-</code></pre>
-</notextile>
+You can also visit the "Docker tutorial":https://docs.docker.com/get-started/part2/ for more information and examples.
+
+You should add your Dockerfiles to the same source control repository as the Workflows that use them.
-In this example, we will install the "R" statistical language Debian package "r-base-core". Use @apt-get install@:
+h3. Create a new image
+
+We're now ready to create a new Docker image. Use @docker build@ to create a new image from the Dockerfile.
<notextile>
-<pre><code>root@fbf1d0f529d5:/# <span class="userinput">apt-get install r-base-core</span>
-Reading package lists... Done
-Building dependency tree
-Reading state information... Done
-The following additional packages will be installed:
-[...]
-done.
+<pre><code>docker-example-r-base$ <span class="userinput">docker build -t docker-example-r-base .</span>
</code></pre>
</notextile>
+h3. Verify image
+
Now we can verify that "R" is installed:
<notextile>
-<pre><code>root@fbf1d0f529d5:/# <span class="userinput">R</span>
+<pre><code>$ <span class="userinput">docker run -ti docker-example-r-base</span>
+root@57ec8f8b2663:/# R
-R version 3.3.3 (2017-03-06) -- "Another Canoe"
-Copyright (C) 2017 The R Foundation for Statistical Computing
+R version 3.4.4 (2018-03-15) -- "Someone to Lean On"
+Copyright (C) 2018 The R Foundation for Statistical Computing
Platform: x86_64-pc-linux-gnu (64-bit)
-
-R is free software and comes with ABSOLUTELY NO WARRANTY.
-You are welcome to redistribute it under certain conditions.
-Type 'license()' or 'licence()' for distribution details.
-
-R is a collaborative project with many contributors.
-Type 'contributors()' for more information and
-'citation()' on how to cite R or R packages in publications.
-
-Type 'demo()' for some demos, 'help()' for on-line help, or
-'help.start()' for an HTML browser interface to help.
-Type 'q()' to quit R.
-
->
</code></pre>
</notextile>
-Note that you are not limited to installing Debian packages. You may compile programs or libraries from source and install them, edit systemwide configuration files, use other package managers such as @pip@ or @gem@, and perform any other customization necessary to run your program.
+h2(#upload). Upload your image
-h2. Create a new image
-
-We're now ready to create a new Docker image. First, quit the container, then use @docker commit@ to create a new image from the stopped container. The container id can be found in the default hostname of the container displayed in the prompt, in this case @fbf1d0f529d5@:
+Finally, we are ready to upload the new Docker image to Arvados. Use @arv-keepdocker@ with the image repository name to upload the image. Without arguments, @arv-keepdocker@ will print out the list of Docker images in Arvados that are available to you.
<notextile>
-<pre><code>root@fbf1d0f529d5:/# <span class="userinput">exit</span>
-$ <span class="userinput">docker commit fbf1d0f529d5 arvados/jobs-with-r</span>
-sha256:2818853ff9f9af5d7f77979803baac9c4710790ad2b84c1a754b02728fdff205
-$ <span class="userinput">docker images</span>
-$ docker images |head
-REPOSITORY TAG IMAGE ID CREATED SIZE
-arvados/jobs-with-r latest 2818853ff9f9 9 seconds ago 703.1 MB
-arvados/jobs latest 12b9f859d48c 4 days ago 362 MB
-</code></pre>
-</notextile>
-
-h2. Upload your image
+<pre><code>$ <span class="userinput">arv-keepdocker docker-example-r-base</span>
+2020-06-29 13:48:19 arvados.arv_put[769] INFO: Creating new cache file at /home/peter/.cache/arvados/arv-put/39ddb51ebf6c5fcb3d713b5969466967
+206M / 206M 100.0% 2020-06-29 13:48:21 arvados.arv_put[769] INFO:
-Finally, we are ready to upload the new Docker image to Arvados. Use @arv-keepdocker@ with the image repository name to upload the image. Without arguments, @arv-keepdocker@ will print out the list of Docker images in Arvados that are available to you.
+2020-06-29 13:48:21 arvados.arv_put[769] INFO: Collection saved as 'Docker image docker-example-r-base:latest sha256:edd10'
+zzzzz-4zz18-0tayximqcyb6uf8
-<notextile>
-<pre><code>$ <span class="userinput">arv-keepdocker arvados/jobs-with-r</span>
-703M / 703M 100.0%
-Collection saved as 'Docker image arvados/jobs-with-r:latest 2818853ff9f9'
-qr1hi-4zz18-abcdefghijklmno
-$ <span class="userinput">arv-keepdocker</span>
+$ <span class="userinput">arv-keepdocker images</span>
REPOSITORY TAG IMAGE ID COLLECTION CREATED
-arvados/jobs-with-r latest 2818853ff9f9 qr1hi-4zz18-abcdefghijklmno Tue Jan 17 20:35:53 2017
+docker-example-r-base latest sha256:edd10 zzzzz-4zz18-0tayximqcyb6uf8 Mon Jun 29 17:46:16 2020
</code></pre>
</notextile>
<pre>
hints:
DockerRequirement:
- dockerPull: arvados/jobs-with-r
+ dockerPull: docker-example-r-base
</pre>
-h2. Share Docker images
+h3. Uploading Docker images to a shared project
-Docker images are subject to normal Arvados permissions. If wish to share your Docker image with others (or wish to share a pipeline template that uses your Docker image) you will need to use @arv-keepdocker@ with the @--project-uuid@ option to upload the image to a shared project.
+Docker images are subject to normal Arvados permissions. If wish to share your Docker image with others you should use @arv-keepdocker@ with the @--project-uuid@ option to add the image to a shared project and ensure that metadata is set correctly.
<notextile>
-<pre><code>$ <span class="userinput">arv-keepdocker arvados/jobs-with-r --project-uuid qr1hi-j7d0g-xxxxxxxxxxxxxxx</span>
+<pre><code>$ <span class="userinput">arv-keepdocker docker-example-r-base --project-uuid zzzzz-j7d0g-xxxxxxxxxxxxxxx</span>
</code></pre>
</notextile>
+
+h2(#sources). Sources of pre-built images
+
+In addition to creating your own contianers, there are a number of resources where you can find bioinformatics tools already wrapped in container images:
+
+"BioContainers":https://biocontainers.pro/
+
+"Dockstore":https://dockstore.org/
+
+"Docker Hub":https://hub.docker.com/
+++ /dev/null
----
-layout: default
-navsection: userguide
-title: "Using arv-web"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-@arv-web@ enables you to run a custom web service from the contents of an Arvados collection.
-
-{% include 'tutorial_expectations_workstation' %}
-
-h2. Usage
-
-@arv-web@ enables you to set up a web service based on the most recent collection in a project. An arv-web application is a reproducible, immutable application bundle where the web app is packaged with both the code to run and the data to serve. Because Arvados Collections can be updated with minimum duplication, it is efficient to produce a new application bundle when the code or data needs to be updated; retaining old application bundles makes it easy to go back and run older versions of your web app.
-
-<pre>
-$ cd $HOME/arvados/services/arv-web
-usage: arv-web.py [-h] --project-uuid PROJECT_UUID [--port PORT]
- [--image IMAGE]
-
-optional arguments:
- -h, --help show this help message and exit
- --project-uuid PROJECT_UUID
- Project uuid to watch
- --port PORT Host port to listen on (default 8080)
- --image IMAGE Docker image to run
-</pre>
-
-At startup, @arv-web@ queries an Arvados project and mounts the most recently modified collection into a temporary directory. It then runs a Docker image with the collection bound to @/mnt@ inside the container. When a new collection is added to the project, or an existing project is updated, it will stop the running Docker container, unmount the old collection, mount the new most recently modified collection, and restart the Docker container with the new mount.
-
-h2. Docker container
-
-The @Dockerfile@ in @arvados/docker/arv-web@ builds a Docker image that runs Apache with @/mnt@ as the DocumentRoot. It is configured to run web applications which use Python WSGI, Ruby Rack, or CGI; to serve static HTML; or browse the contents of the @public@ subdirectory of the collection using default Apache index pages.
-
-To build the Docker image:
-
-<notextile>
-<pre><code>~$ <span class="userinput">cd arvados/docker</span>
-~/arvados/docker$ <span class="userinput">docker build -t arvados/arv-web arv-web</span>
-</code></pre>
-</notextile>
-
-h2. Running sample applications
-
-First, in Arvados Workbench, create a new project. Copy the project UUID from the URL bar (this is the part of the URL after @projects/...@).
-
-Now upload a collection containing a "Python WSGI web app:":http://wsgi.readthedocs.org/en/latest/
-
-<notextile>
-<pre><code>~$ <span class="userinput">cd arvados/services/arv-web</span>
-~/arvados/services/arv-web$ <span class="userinput">arv-put --project [zzzzz-j7d0g-yourprojectuuid] --name sample-wsgi-app sample-wsgi-app</span>
-0M / 0M 100.0%
-Collection saved as 'sample-wsgi-app'
-zzzzz-4zz18-ebohzfbzh82qmqy
-~/arvados/services/arv-web$ <span class="userinput">./arv-web.py --project [zzzzz-j7d0g-yourprojectuuid] --port 8888</span>
-2015-01-30 11:21:00 arvados.arv-web[4897] INFO: Mounting zzzzz-4zz18-ebohzfbzh82qmqy
-2015-01-30 11:21:01 arvados.arv-web[4897] INFO: Starting Docker container arvados/arv-web
-2015-01-30 11:21:02 arvados.arv-web[4897] INFO: Container id e79e70558d585a3e038e4bfbc97e5c511f21b6101443b29a8017bdf3d84689a3
-2015-01-30 11:21:03 arvados.arv-web[4897] INFO: Waiting for events
-</code></pre>
-</notextile>
-
-The sample application will be available at @http://localhost:8888@.
-
-h3. Updating the application
-
-If you upload a new collection to the same project, arv-web will restart the web service and serve the new collection. For example, uploading a collection containing a "Ruby Rack web app:":https://github.com/rack/rack/wiki
-
-<notextile>
-<pre><code>~$ <span class="userinput">cd arvados/services/arv-web</span>
-~/arvados/services/arv-web$ <span class="userinput">arv-put --project [zzzzz-j7d0g-yourprojectuuid] --name sample-rack-app sample-rack-app</span>
-0M / 0M 100.0%
-Collection saved as 'sample-rack-app'
-zzzzz-4zz18-dhhm0ay8k8cqkvg
-</code></pre>
-</notextile>
-
-@arv-web@ will automatically notice the change, load a new container, and send an update signal (SIGHUP) to the service:
-
-<pre>
-2015-01-30 11:21:03 arvados.arv-web[4897] INFO:Waiting for events
-2015-01-30 11:21:04 arvados.arv-web[4897] INFO:create zzzzz-4zz18-dhhm0ay8k8cqkvg
-2015-01-30 11:21:05 arvados.arv-web[4897] INFO:Mounting zzzzz-4zz18-dhhm0ay8k8cqkvg
-2015-01-30 11:21:06 arvados.arv-web[4897] INFO:Sending refresh signal to container
-2015-01-30 11:21:07 arvados.arv-web[4897] INFO:Waiting for events
-</pre>
-
-h2. Writing your own applications
-
-The @arvados/arv-web@ image serves Python and Ruby applications using Phusion Passenger and Apache @mod_passenger@. See "Phusion Passenger users guide for Apache":https://www.phusionpassenger.com/documentation/Users%20guide%20Apache.html for details, and look at the sample apps @arvados/services/arv-web/sample-wsgi-app@ and @arvados/services/arv-web/sample-rack-app@.
-
-You can serve CGI applications using standard Apache CGI support. See "Apache Tutorial: Dynamic Content with CGI":https://httpd.apache.org/docs/current/howto/cgi.html for details, and look at the sample app @arvados/services/arv-web/sample-cgi-app@.
-
-You can also serve static content from the @public@ directory of the collection. Look at @arvados/services/arv-web/sample-static-page@ for an example. If no @index.html@ is found in @public/@, it will render default Apache index pages, permitting simple browsing of the collection contents.
-
-h3. Custom images
-
-You can provide your own Docker image. The Docker image that will be used create the web application container is specified in the @docker_image@ file in the root of the collection. You can also specify @--image@ on the command @arv-web@ line to choose the docker image (this will override the contents of @docker_image@).
-
-h3. Reloading the web service
-
-Stopping the Docker container and starting it again can result in a small amount of downtime. When the collection containing a new or updated web application uses the same Docker image as the currently running web application, it is possible to avoid this downtime by keeping the existing container and only reloading the web server. This is accomplished by providing a file called @reload@ in the root of the collection, which should contain the commands necessary to reload the web server inside the container.
When you run this command, you may get this API warning:
-notextile. <pre><code>WARNING:root:API lookup failed for collection 204e43b8a1185621ca55a94839582e6f+67108864 (<class 'apiclient.errors.HttpError'>: <HttpError 404 when requesting https://qr1hi.arvadosapi.com/arvados/v1/collections/204e43b8a1185621ca55a94839582e6f%2B67108864?alt=json returned "Not Found">)</code></pre>
+notextile. <pre><code>WARNING:root:API lookup failed for collection 204e43b8a1185621ca55a94839582e6f+67108864 (<class 'apiclient.errors.HttpError'>: <HttpError 404 when requesting https://zzzzz.arvadosapi.com/arvados/v1/collections/204e43b8a1185621ca55a94839582e6f%2B67108864?alt=json returned "Not Found">)</code></pre>
This happens because @arv-get@ tries to find a collection with this identifier. When that fails, it emits this warning, then looks for a datablock instead, which succeeds.
+++ /dev/null
----
-layout: default
-navsection: userguide
-title: "Using GATK with Arvados"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-This tutorial demonstrates how to use the Genome Analysis Toolkit (GATK) with Arvados. In this example we will install GATK and then create a VariantFiltration job to assign pass/fail scores to variants in a VCF file.
-
-{% include 'tutorial_expectations' %}
-
-h2. Installing GATK
-
-Download the GATK binary tarball[1] -- e.g., @GenomeAnalysisTK-2.6-4.tar.bz2@ -- and "copy it to your Arvados VM":{{site.baseurl}}/user/tutorials/tutorial-keep.html.
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv-put GenomeAnalysisTK-2.6-4.tar.bz2</span>
-c905c8d8443a9c44274d98b7c6cfaa32+94
-</code></pre>
-</notextile>
-
-Next, you need the GATK Resource Bundle[2]. This may already be available in Arvados. If not, you will need to download the files listed below and put them into Keep.
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv keep ls -s d237a90bae3870b3b033aea1e99de4a9+10820</span>
- 50342 1000G_omni2.5.b37.vcf.gz
- 1 1000G_omni2.5.b37.vcf.gz.md5
- 464 1000G_omni2.5.b37.vcf.idx.gz
- 1 1000G_omni2.5.b37.vcf.idx.gz.md5
- 43981 1000G_phase1.indels.b37.vcf.gz
- 1 1000G_phase1.indels.b37.vcf.gz.md5
- 326 1000G_phase1.indels.b37.vcf.idx.gz
- 1 1000G_phase1.indels.b37.vcf.idx.gz.md5
- 537210 CEUTrio.HiSeq.WGS.b37.bestPractices.phased.b37.vcf.gz
- 1 CEUTrio.HiSeq.WGS.b37.bestPractices.phased.b37.vcf.gz.md5
- 3473 CEUTrio.HiSeq.WGS.b37.bestPractices.phased.b37.vcf.idx.gz
- 1 CEUTrio.HiSeq.WGS.b37.bestPractices.phased.b37.vcf.idx.gz.md5
- 19403 Mills_and_1000G_gold_standard.indels.b37.vcf.gz
- 1 Mills_and_1000G_gold_standard.indels.b37.vcf.gz.md5
- 536 Mills_and_1000G_gold_standard.indels.b37.vcf.idx.gz
- 1 Mills_and_1000G_gold_standard.indels.b37.vcf.idx.gz.md5
- 29291 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.sites.vcf.gz
- 1 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.sites.vcf.gz.md5
- 565 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.sites.vcf.idx.gz
- 1 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.sites.vcf.idx.gz.md5
- 37930 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf.gz
- 1 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf.gz.md5
- 592 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf.idx.gz
- 1 NA12878.HiSeq.WGS.bwa.cleaned.raw.subset.b37.vcf.idx.gz.md5
-5898484 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.bam
- 112 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.bam.bai.gz
- 1 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.bam.bai.gz.md5
- 1 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.bam.md5
- 3837 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.vcf.gz
- 1 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.vcf.gz.md5
- 65 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.vcf.idx.gz
- 1 NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.vcf.idx.gz.md5
- 275757 dbsnp_137.b37.excluding_sites_after_129.vcf.gz
- 1 dbsnp_137.b37.excluding_sites_after_129.vcf.gz.md5
- 3735 dbsnp_137.b37.excluding_sites_after_129.vcf.idx.gz
- 1 dbsnp_137.b37.excluding_sites_after_129.vcf.idx.gz.md5
- 998153 dbsnp_137.b37.vcf.gz
- 1 dbsnp_137.b37.vcf.gz.md5
- 3890 dbsnp_137.b37.vcf.idx.gz
- 1 dbsnp_137.b37.vcf.idx.gz.md5
- 58418 hapmap_3.3.b37.vcf.gz
- 1 hapmap_3.3.b37.vcf.gz.md5
- 999 hapmap_3.3.b37.vcf.idx.gz
- 1 hapmap_3.3.b37.vcf.idx.gz.md5
- 3 human_g1k_v37.dict.gz
- 1 human_g1k_v37.dict.gz.md5
- 2 human_g1k_v37.fasta.fai.gz
- 1 human_g1k_v37.fasta.fai.gz.md5
- 849537 human_g1k_v37.fasta.gz
- 1 human_g1k_v37.fasta.gz.md5
- 1 human_g1k_v37.stats.gz
- 1 human_g1k_v37.stats.gz.md5
- 3 human_g1k_v37_decoy.dict.gz
- 1 human_g1k_v37_decoy.dict.gz.md5
- 2 human_g1k_v37_decoy.fasta.fai.gz
- 1 human_g1k_v37_decoy.fasta.fai.gz.md5
- 858592 human_g1k_v37_decoy.fasta.gz
- 1 human_g1k_v37_decoy.fasta.gz.md5
- 1 human_g1k_v37_decoy.stats.gz
- 1 human_g1k_v37_decoy.stats.gz.md5
-</code></pre>
-</notextile>
-
-h2. Submit a GATK job
-
-The Arvados distribution includes an example crunch script ("crunch_scripts/GATK2-VariantFiltration":https://dev.arvados.org/projects/arvados/repository/revisions/master/entry/crunch_scripts/GATK2-VariantFiltration) that runs the GATK VariantFiltration tool with some default settings.
-
-<notextile>
-<pre><code>~$ <span class="userinput">src_version=76588bfc57f33ea1b36b82ca7187f465b73b4ca4</span>
-~$ <span class="userinput">vcf_input=5ee633fe2569d2a42dd81b07490d5d13+82</span>
-~$ <span class="userinput">gatk_binary=c905c8d8443a9c44274d98b7c6cfaa32+94</span>
-~$ <span class="userinput">gatk_bundle=d237a90bae3870b3b033aea1e99de4a9+10820</span>
-~$ <span class="userinput">cat >the_job <<EOF
-{
- "script":"GATK2-VariantFiltration",
- "repository":"arvados",
- "script_version":"$src_version",
- "script_parameters":
- {
- "input":"$vcf_input",
- "gatk_binary_tarball":"$gatk_binary",
- "gatk_bundle":"$gatk_bundle"
- }
-}
-EOF</span>
-</code></pre>
-</notextile>
-
-* @"input"@ is collection containing the source VCF data. Here we are using an exome report from PGP participant hu34D5B9.
-* @"gatk_binary_tarball"@ is a Keep collection containing the GATK 2 binary distribution[1] tar file.
-* @"gatk_bundle"@ is a Keep collection containing the GATK resource bundle[2].
-
-Now start a job:
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv job create --job "$(cat the_job)"</span>
-{
- "href":"https://qr1hi.arvadosapi.com/arvados/v1/jobs/qr1hi-8i9sb-n9k7qyp7bs5b9d4",
- "kind":"arvados#job",
- "etag":"9j99n1feoxw3az448f8ises12",
- "uuid":"qr1hi-8i9sb-n9k7qyp7bs5b9d4",
- "owner_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "created_at":"2013-12-17T19:02:15Z",
- "modified_by_client_uuid":"qr1hi-ozdt8-obw7foaks3qjyej",
- "modified_by_user_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "modified_at":"2013-12-17T19:02:15Z",
- "updated_at":"2013-12-17T19:02:15Z",
- "submit_id":null,
- "priority":null,
- "script":"GATK2-VariantFiltration",
- "script_parameters":{
- "input":"5ee633fe2569d2a42dd81b07490d5d13+82",
- "gatk_binary_tarball":"c905c8d8443a9c44274d98b7c6cfaa32+94",
- "gatk_bundle":"d237a90bae3870b3b033aea1e99de4a9+10820"
- },
- "script_version":"76588bfc57f33ea1b36b82ca7187f465b73b4ca4",
- "cancelled_at":null,
- "cancelled_by_client_uuid":null,
- "cancelled_by_user_uuid":null,
- "started_at":null,
- "finished_at":null,
- "output":null,
- "success":null,
- "running":null,
- "is_locked_by_uuid":null,
- "log":null,
- "runtime_constraints":{},
- "tasks_summary":{}
-}
-</code></pre>
-</notextile>
-
-Once the job completes, the output can be found in hu34D5B9-exome-filtered.vcf:
-
-<notextile><pre><code>~$ <span class="userinput">arv keep ls bedd6ff56b3ae9f90d873b1fcb72f9a3+91</span>
-hu34D5B9-exome-filtered.vcf
-</code></pre>
-</notextile>
-
-h2. Notes
-
-fn1. "Download the GATK tools":http://www.broadinstitute.org/gatk/download
-
-fn2. "Information about the GATK resource bundle":http://gatkforums.broadinstitute.org/discussion/1213/whats-in-the-resource-bundle-and-how-can-i-get-it and "direct download link":ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/2.5/b37/ (if prompted, submit an empty password)
+++ /dev/null
----
-layout: default
-navsection: userguide
-title: "Running a Crunch job on the command line"
-...
-{% comment %}
-Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: CC-BY-SA-3.0
-{% endcomment %}
-
-This tutorial introduces how to run individual Crunch jobs using the @arv@ command line tool.
-
-{% include 'tutorial_expectations' %}
-
-You will create a job to run the "hash" Crunch script. The "hash" script computes the MD5 hash of each file in a collection.
-
-h2. Jobs
-
-Crunch pipelines consist of one or more jobs. A "job" is a single run of a specific version of a Crunch script with a specific input. You can also run jobs individually.
-
-A request to run a Crunch job are is described using a JSON object. For example:
-
-<notextile>
-<pre><code>~$ <span class="userinput">cat >~/the_job <<EOF
-{
- "script": "hash",
- "repository": "arvados",
- "script_version": "master",
- "script_parameters": {
- "input": "c1bad4b39ca5a924e481008009d94e32+210"
- },
- "no_reuse": "true"
-}
-EOF
-</code></pre>
-</notextile>
-
-* @cat@ is a standard Unix utility that writes a sequence of input to standard output.
-* @<<EOF@ tells the shell to direct the following lines into the standard input for @cat@ up until it sees the line @EOF@.
-* @>~/the_job@ redirects standard output to a file called @~/the_job@.
-* @"repository"@ is the name of a Git repository to search for the script version. You can access a list of available git repositories on the Arvados Workbench under "*Code repositories*":{{site.arvados_workbench_host}}/repositories.
-* @"script_version"@ specifies the version of the script that you wish to run. This can be in the form of an explicit Git revision hash, a tag, or a branch. Arvados logs the script version that was used in the run, enabling you to go back and re-run any past job with the guarantee that the exact same code will be used as was used in the previous run.
-* @"script"@ specifies the name of the script to run. The script must be given relative to the @crunch_scripts/@ subdirectory of the Git repository.
-* @"script_parameters"@ are provided to the script. In this case, the input is the PGP data Collection that we "put in Keep earlier":{{site.baseurl}}/user/tutorials/tutorial-keep.html.
-* Setting the @"no_reuse"@ flag tells Crunch not to reuse work from past jobs. This helps ensure that you can watch a new Job process for the rest of this tutorial, without reusing output from a past run that you made, or somebody else marked as public. (If you want to experiment, after the first run below finishes, feel free to edit this job to remove the @"no_reuse"@ line and resubmit it. See what happens!)
-
-Use @arv job create@ to actually submit the job. It should print out a JSON object which describes the newly created job:
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv job create --job "$(cat ~/the_job)"</span>
-{
- "href":"https://qr1hi.arvadosapi.com/arvados/v1/jobs/qr1hi-8i9sb-1pm1t02dezhupss",
- "kind":"arvados#job",
- "etag":"ax3cn7w9whq2hdh983yxvq09p",
- "uuid":"qr1hi-8i9sb-1pm1t02dezhupss",
- "owner_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "created_at":"2013-12-16T20:44:32Z",
- "modified_by_client_uuid":"qr1hi-ozdt8-obw7foaks3qjyej",
- "modified_by_user_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "modified_at":"2013-12-16T20:44:32Z",
- "updated_at":"2013-12-16T20:44:33Z",
- "submit_id":null,
- "priority":null,
- "script":"hash",
- "script_parameters":{
- "input":"c1bad4b39ca5a924e481008009d94e32+210"
- },
- "script_version":"d9cd657b733d578ac0d2167dd75967aa4f22e0ac",
- "cancelled_at":null,
- "cancelled_by_client_uuid":null,
- "cancelled_by_user_uuid":null,
- "started_at":null,
- "finished_at":null,
- "output":null,
- "success":null,
- "running":null,
- "is_locked_by_uuid":null,
- "log":null,
- "runtime_constraints":{},
- "tasks_summary":{}
-}
-</code></pre>
-</notextile>
-
-The job is now queued and will start running as soon as it reaches the front of the queue. Fields to pay attention to include:
-
- * @"uuid"@ is the unique identifier for this specific job.
- * @"script_version"@ is the actual revision of the script used. This is useful if the version was described using the "repository:branch" format.
-
-h2. Monitor job progress
-
-Go to "*Recent jobs*":{{site.arvados_workbench_host}}/jobs in Workbench. Your job should be near the top of the table. This table refreshes automatically. When the job has completed successfully, it will show <span class="label label-success">finished</span> in the *Status* column.
-
-h2. Inspect the job output
-
-On the "Workbench Dashboard":{{site.arvados_workbench_host}}, look for the *Output* column of the *Recent jobs* table. Click on the link under *Output* for your job to go to the files page with the job output. The files page lists all the files that were output by the job. Click on the link under the *file* column to view a file, or click on the download button <span class="glyphicon glyphicon-download-alt"></span> to download the output file.
-
-On the command line, you can use @arv job get@ to access a JSON object describing the output:
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv job get --uuid qr1hi-8i9sb-xxxxxxxxxxxxxxx</span>
-{
- "href":"https://qr1hi.arvadosapi.com/arvados/v1/jobs/qr1hi-8i9sb-1pm1t02dezhupss",
- "kind":"arvados#job",
- "etag":"1bk98tdj0qipjy0rvrj03ta5r",
- "uuid":"qr1hi-8i9sb-1pm1t02dezhupss",
- "owner_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "created_at":"2013-12-16T20:44:32Z",
- "modified_by_client_uuid":null,
- "modified_by_user_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "modified_at":"2013-12-16T20:44:55Z",
- "updated_at":"2013-12-16T20:44:55Z",
- "submit_id":null,
- "priority":null,
- "script":"hash",
- "script_parameters":{
- "input":"c1bad4b39ca5a924e481008009d94e32+210"
- },
- "script_version":"d9cd657b733d578ac0d2167dd75967aa4f22e0ac",
- "cancelled_at":null,
- "cancelled_by_client_uuid":null,
- "cancelled_by_user_uuid":null,
- "started_at":"2013-12-16T20:44:36Z",
- "finished_at":"2013-12-16T20:44:53Z",
- "output":"dd755dbc8d49a67f4fe7dc843e4f10a6+54",
- "success":true,
- "running":false,
- "is_locked_by_uuid":"qr1hi-tpzed-9zdpkpni2yddge6",
- "log":"2afdc6c8b67372ffd22d8ce89d35411f+91",
- "runtime_constraints":{},
- "tasks_summary":{
- "done":2,
- "running":0,
- "failed":0,
- "todo":0
- }
-}
-</code></pre>
-</notextile>
-
-* @"output"@ is the unique identifier for this specific job's output. This is a Keep collection. Because the output of Arvados jobs should be deterministic, the known expected output is <code>dd755dbc8d49a67f4fe7dc843e4f10a6+54</code>.
-
-Now you can list the files in the collection:
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv keep ls dd755dbc8d49a67f4fe7dc843e4f10a6+54</span>
-./md5sum.txt
-</code></pre>
-</notextile>
-
-This collection consists of the @md5sum.txt@ file. Use @arv-get@ to show the contents of the @md5sum.txt@ file:
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv-get dd755dbc8d49a67f4fe7dc843e4f10a6+54/md5sum.txt</span>
-44b8ae3fde7a8a88d2f7ebd237625b4f ./var-GS000016015-ASM.tsv.bz2
-</code></pre>
-</notextile>
-
-This MD5 hash matches the MD5 hash which we "computed earlier":{{site.baseurl}}/user/tutorials/tutorial-keep.html.
-
-h2. The job log
-
-When the job completes, you can access the job log. On the Workbench, visit "*Recent jobs*":{{site.arvados_workbench_host}}/jobs %(rarr)→% your job's UUID under the *uuid* column %(rarr)→% the collection link on the *log* row.
-
-On the command line, the Keep identifier listed in the @"log"@ field from @arv job get@ specifies a collection. You can list the files in the collection:
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv keep ls xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx+91</span>
-./qr1hi-8i9sb-xxxxxxxxxxxxxxx.log.txt
-</code></pre>
-</notextile>
-
-The log collection consists of one log file named with the job's UUID. You can access it using @arv-get@:
-
-<notextile>
-<pre><code>~$ <span class="userinput">arv-get xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx+91/qr1hi-8i9sb-xxxxxxxxxxxxxxx.log.txt</span>
-2013-12-16_20:44:35 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 check slurm allocation
-2013-12-16_20:44:35 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 node compute13 - 8 slots
-2013-12-16_20:44:36 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 start
-2013-12-16_20:44:36 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 Install revision d9cd657b733d578ac0d2167dd75967aa4f22e0ac
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 Clean-work-dir exited 0
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 Install exited 0
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 script hash
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 script_version d9cd657b733d578ac0d2167dd75967aa4f22e0ac
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 script_parameters {"input":"c1bad4b39ca5a924e481008009d94e32+210"}
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 runtime_constraints {"max_tasks_per_node":0}
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 start level 0
-2013-12-16_20:44:37 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 status: 0 done, 0 running, 1 todo
-2013-12-16_20:44:38 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 0 job_task qr1hi-ot0gb-23c1k3kwrf8da62
-2013-12-16_20:44:38 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 0 child 7681 started on compute13.1
-2013-12-16_20:44:38 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 status: 0 done, 1 running, 0 todo
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 0 child 7681 on compute13.1 exit 0 signal 0 success=true
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 0 success in 1 seconds
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 0 output
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 wait for last 0 children to finish
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 status: 1 done, 0 running, 1 todo
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 start level 1
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 status: 1 done, 0 running, 1 todo
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 1 job_task qr1hi-ot0gb-iwr0o3unqothg28
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 1 child 7716 started on compute13.1
-2013-12-16_20:44:39 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 status: 1 done, 1 running, 0 todo
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 1 child 7716 on compute13.1 exit 0 signal 0 success=true
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 1 success in 13 seconds
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 1 output dd755dbc8d49a67f4fe7dc843e4f10a6+54
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 wait for last 0 children to finish
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 status: 2 done, 0 running, 0 todo
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 release job allocation
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 Freeze not implemented
-2013-12-16_20:44:52 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 collate
-2013-12-16_20:44:53 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 output dd755dbc8d49a67f4fe7dc843e4f10a6+54+K@qr1hi
-2013-12-16_20:44:53 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 finish
-</code></pre>
-</notextile>
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-Arvados repositories are managed through the Git revision control system. You can use these repositories to store your crunch scripts and run them in the arvados cluster.
+Arvados supports managing git repositories. You can access these repositories using your Arvados credentials and share them with other Arvados users.
{% include 'tutorial_expectations' %}
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-This tutorial describes how to work with a new Arvados git repository. Working with an Arvados git repository is analogous to working with other public git repositories. It will show you how to upload custom scripts to a remote Arvados repository, so you can use it in Arvados pipelines.
+This tutorial describes how to work with an Arvados-managed git repository. Working with an Arvados git repository is very similar to working with other public git repositories.
{% include 'tutorial_expectations' %}
{% include 'tutorial_git_repo_expectations' %}
-{% include 'notebox_begin' %}
-For more information about using Git, try
-<notextile>
-<pre><code>$ <span class="userinput">man gittutorial</span></code></pre>
-</notextile> or *"search Google for Git tutorials":http://google.com/#q=git+tutorial*.
-{% include 'notebox_end' %}
-
-h2. Cloning an Arvados repository
+h2. Cloning a git repository
Before you start using Git, you should do some basic configuration (you only need to do this the first time):
h2. Adding scripts to an Arvados repository
-Arvados crunch scripts need to be added in a *crunch_scripts* subdirectory in the repository. If this subdirectory does not exist, first create it in the local repository and change to that directory:
-
-<notextile>
-<pre><code>~/tutorial$ <span class="userinput">mkdir crunch_scripts</span>
-~/tutorial$ <span class="userinput">cd crunch_scripts</span></code></pre>
-</notextile>
-
-Next, using @nano@ or your favorite Unix text editor, create a new file called @hash.py@ in the @crunch_scripts@ directory.
-
-notextile. <pre>~/tutorial/crunch_scripts$ <code class="userinput">nano hash.py</code></pre>
-
-Add the following code to compute the MD5 hash of each file in a collection
+A git repository is a good place to store the CWL workflows that you run on Arvados.
-<notextile> {% code 'tutorial_hash_script_py' as python %} </notextile>
+First, create a simple CWL CommandLineTool:
-Make the file executable:
+notextile. <pre>~/tutorials$ <code class="userinput">nano hello.cwl</code></pre>
-notextile. <pre><code>~/tutorial/crunch_scripts$ <span class="userinput">chmod +x hash.py</span></code></pre>
+<notextile> {% code 'tutorial_hello_cwl' as yaml %} </notextile>
Next, add the file to the git repository. This tells @git@ that the file should be included on the next commit.
-notextile. <pre><code>~/tutorial/crunch_scripts$ <span class="userinput">git add hash.py</span></code></pre>
+notextile. <pre><code>~/tutorial$ <span class="userinput">git add hello.cwl</span></code></pre>
Next, commit your changes. All staged changes are recorded into the local git repository:
<notextile>
-<pre><code>~/tutorial/crunch_scripts$ <span class="userinput">git commit -m "my first script"</span>
+<pre><code>~/tutorial$ <span class="userinput">git commit -m "my first script"</span>
</code></pre>
</notextile>
</code></pre>
</notextile>
-Although this tutorial shows how to add a python script to Arvados, the same steps can be used to add any of your custom bash, R, or python scripts to an Arvados repository.
+The same steps can be used to add any of your custom bash, R, or python scripts to an Arvados repository.
During it's lifetime, a keep collection can be in various states. These states are *persisted*, *expiring*, *trashed* and *permanently deleted*.
+The nominal state is *persisted* which means the data can be can be accessed normally and will be retained indefinitely.
+
A collection is *expiring* when it has a *trash_at* time in the future. An expiring collection can be accessed as normal, but is scheduled to be trashed automatically at the *trash_at* time.
A collection is *trashed* when it has a *trash_at* time in the past. The *is_trashed* attribute will also be "true". The delete operation immediately puts the collection in the trash by setting the *trash_at* time to "now". Once trashed, the collection is no longer readable through normal data access APIs. The collection will have *delete_at* set to some time in the future. The trashed collection is recoverable until the delete_at time passes, at which point the collection is permanently deleted.
-# "*Collection lifecycle attributes*":#collection_attributes
# "*Deleting / trashing collections*":#delete-collection
# "*Recovering trashed collections*":#trash-recovery
+# "*Collection lifecycle attributes*":#collection_attributes
{% include 'tutorial_expectations' %}
-h2(#collection_attributes). Collection lifecycle attributes
-
-As listed above the attributes that are used to manage a collection lifecycle are it's *is_trashed*, *trash_at*, and *delete_at*. The table below lists the values of these attributes and how they influence the state of a collection and it's accessibility.
-
-table(table table-bordered table-condensed).
-|_. collection state|_. is_trashed|_. trash_at|_. delete_at|_. get|_. list|_. list?include_trash=true|_. can be modified|
-|persisted collection|false |null |null |yes |yes |yes |yes |
-|expiring collection|false |future |future |yes |yes |yes |yes |
-|trashed collection|true |past |future |no |no |yes |only is_trashed, trash_at and delete_at attribtues|
-|deleted collection|true|past |past |no |no |no |no |
-
h2(#delete-collection). Deleting / trashing collections
A collection can be deleted using either the arv command line tool or the workbench.
+h3. Trashing a collection using workbench
+
+To trash a collection using workbench, go to the Data collections tab in the project, and use the <i class="fa fa-fw fa-trash-o"></i> trash icon for this collection row.
+
h3. Trashing a collection using arv command line tool
<pre>
-arv collection delete --uuid=qr1hi-4zz18-xxxxxxxxxxxxxxx
+arv collection delete --uuid=zzzzz-4zz18-xxxxxxxxxxxxxxx
</pre>
-h3. Trashing a collection using workbench
-
-To trash a collection using workbench, go to the Data collections tab in the project, and use the trash icon for this collection row.
-
h2(#trash-recovery). Recovering trashed collections
A collection can be un-trashed / recovered using either the arv command line tool or the workbench.
+h3. Un-trashing a collection using workbench
+
+To untrash a collection using workbench, go to trash page on workbench by clicking on the "Trash" icon in the top navigation in workbench and use the recycle icon or selection dropdown option.
+
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/trash-button-topnav.png!
+
h3. Un-trashing a collection using arv command line tool
You can list the trashed collections using the list command.
You can then untrash a particular collection using arv using it's uuid.
<pre>
-arv collection untrash --uuid=qr1hi-4zz18-xxxxxxxxxxxxxxx
+arv collection untrash --uuid=zzzzz-4zz18-xxxxxxxxxxxxxxx
</pre>
-h3. Un-trashing a collection using workbench
+h2(#collection_attributes). Collection lifecycle attributes
-To untrash a collection using workbench, go to trash page on workbench by clicking on the "Trash" icon in the top navigation in workbench and use the recycle icon or selection dropdown option.
+As listed above the attributes that are used to manage a collection lifecycle are it's *is_trashed*, *trash_at*, and *delete_at*. The table below lists the values of these attributes and how they influence the state of a collection and it's accessibility.
-!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/trash-button-topnav.png!
+table(table table-bordered table-condensed).
+|_. collection state|_. is_trashed|_. trash_at|_. delete_at|_. get|_. list|_. list?include_trash=true|_. can be modified|
+|persisted collection|false |null |null |yes |yes |yes |yes |
+|expiring collection|false |future |future |yes |yes |yes |yes |
+|trashed collection|true |past |future |no |no |yes |only is_trashed, trash_at and delete_at attribtues|
+|deleted collection|true|past |past |no |no |no |no |
Arvados Data collections can be downloaded using either the arv commands or using Workbench.
-# "*Downloading using arv commands*":#download-using-arv
-# "*Downloading using Workbench*":#download-using-workbench
-# "*Downloading a shared collection using Workbench*":#download-shared-collection
+# "*Download using Workbench*":#download-using-workbench
+# "*Sharing collections*":#download-shared-collection
+# "*Download using command line tools*":#download-using-arv
-h2(#download-using-arv). Downloading using arv commands
+h2(#download-using-workbench). Download using Workbench
+
+You can also download Arvados data collections using the Workbench.
+
+Visit the Workbench *Dashboard*. Click on *Projects*<span class="caret"></span> dropdown menu in the top navigation menu, select your *Home* project. You will see the *Data collections* tab, which lists the collections in this project.
+
+You can access the contents of a collection by clicking on the *<i class="fa fa-fw fa-archive"></i> Show* button next to the collection. This will take you to the collection's page. Using this page you can see the collection's contents, and download individual files.
+
+You can now download the collection files by clicking on the <span class="btn btn-sm btn-info"><i class="fa fa-download"></i></span> button(s).
+
+h2(#download-shared-collection). Sharing collections
+
+h3. Sharing with other Arvados users
+
+Collections can be shared with other users on the Arvados cluster by sharing the parent project. Navigate to the parent project using the "breadcrumbs" bar, then click on the *Sharing* tab. From the sharing tab, you can choose which users or groups to share with, and their level of access.
+
+h3. Creating a special download URL
+
+To share a collection with users that do not have an account on your Arvados cluster, visit the collection page using Workbench as described in the above section. Once on this page, click on the <span class="btn btn-sm btn-primary" >Create sharing link</span> button.
+
+This will create a sharing link for the collection as shown below. You can copy the sharing link in this page and share it with other users.
+
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/shared-collection.png!
+
+A user with this url can download this collection by simply accessing this url using browser. It will present a downloadable version of the collection as shown below.
+
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/download-shared-collection.png!
+
+h2(#download-using-arv). Download using command line tools
{% include 'tutorial_expectations' %}
Use @arv-ls@ to view the contents of a collection:
<notextile>
-<pre><code>~$ <span class="userinput">arv-ls c1bad4b39ca5a924e481008009d94e32+210</span>
-var-GS000016015-ASM.tsv.bz2
+<pre><code>~$ <span class="userinput">arv-ls ae480c5099b81e17267b7445e35b4bc7+180</span>
+./HWI-ST1027_129_D0THKACXX.1_1.fastq
+./HWI-ST1027_129_D0THKACXX.1_2.fastq
</code></pre>
-<pre><code>~$ <span class="userinput">arv-ls 887cd41e9c613463eab2f0d885c6dd96+83</span>
-alice.txt
-bob.txt
-carol.txt
-</code></pre>
-</notextile>
-
-Use @-s@ to print file sizes rounded up to the nearest kilobyte:
+Use @-s@ to print file sizes, in kilobytes, rounded up:
<notextile>
-<pre><code>~$ <span class="userinput">arv-ls -s c1bad4b39ca5a924e481008009d94e32+210</span>
-221887 var-GS000016015-ASM.tsv.bz2
+<pre><code>~$ <span class="userinput">arv-ls -s ae480c5099b81e17267b7445e35b4bc7+180</span>
+ 12258 ./HWI-ST1027_129_D0THKACXX.1_1.fastq
+ 12258 ./HWI-ST1027_129_D0THKACXX.1_2.fastq
</code></pre>
</notextile>
Use @arv-get@ to download the contents of a collection and place it in the directory specified in the second argument (in this example, @.@ for the current directory):
<notextile>
-<pre><code>~$ <span class="userinput">arv-get c1bad4b39ca5a924e481008009d94e32+210/ .</span>
-~$ <span class="userinput">ls var-GS000016015-ASM.tsv.bz2</span>
-var-GS000016015-ASM.tsv.bz2
+<pre><code>~$ <span class="userinput">$ arv-get ae480c5099b81e17267b7445e35b4bc7+180/ .</span>
+23 MiB / 23 MiB 100.0%
+~$ <span class="userinput">ls</span>
+HWI-ST1027_129_D0THKACXX.1_1.fastq HWI-ST1027_129_D0THKACXX.1_2.fastq
</code></pre>
</notextile>
You can also download individual files:
<notextile>
-<pre><code>~$ <span class="userinput">arv-get 887cd41e9c613463eab2f0d885c6dd96+83/alice.txt .</span>
+<pre><code>~$ <span class="userinput">arv-get ae480c5099b81e17267b7445e35b4bc7+180/HWI-ST1027_129_D0THKACXX.1_1.fastq .</span>
+11 MiB / 11 MiB 100.0%
</code></pre>
</notextile>
If you request a collection by portable data hash, it will first search the home cluster, then search federated clusters.
-You may also request a collection by UUID. In this case, it will contact the cluster named in the UUID prefix (in this example, @qr1hi@).
+You may also request a collection by UUID. In this case, it will contact the cluster named in the UUID prefix (in this example, @zzzzz@).
<notextile>
-<pre><code>~$ <span class="userinput">arv-get qr1hi-4zz18-fw6dnjxtkvzdewt/ .</span>
+<pre><code>~$ <span class="userinput">arv-get zzzzz-4zz18-fw6dnjxtkvzdewt/ .</span>
</code></pre>
</notextile>
-
-h2(#download-using-workbench). Downloading using Workbench
-
-You can also download Arvados data collections using the Workbench.
-
-Visit the Workbench *Dashboard*. Click on *Projects*<span class="caret"></span> dropdown menu in the top navigation menu, select your *Home* project. You will see the *Data collections* tab, which lists the collections in this project.
-
-You can access the contents of a collection by clicking on the *<i class="fa fa-fw fa-archive"></i> Show* button next to the collection. This will take you to the collection's page. Using this page you can see the collection's contents, download individual files, and set sharing options.
-
-You can now download the collection files by clicking on the <span class="btn btn-sm btn-info"><i class="fa fa-download"></i></span> button(s).
-
-h2(#download-shared-collection). Downloading a shared collection using Workbench
-
-Collections can be shared to allow downloads by anonymous users.
-
-To share a collection with anonymous users, visit the collection page using Workbench as described in the above section. Once on this page, click on the <span class="btn btn-sm btn-primary" >Create sharing link</span> button.
-
-This will create a sharing link for the collection as shown below. You can copy the sharing link in this page and share it with other users.
-
-!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/shared-collection.png!
-
-A user with this url can download this collection by simply accessing this url using browser. It will present a downloadable version of the collection as shown below.
-
-!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/download-shared-collection.png!
---
layout: default
navsection: userguide
-title: "Accessing Keep from GNU/Linux"
+title: "Access Keep as a GNU/Linux filesystem"
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-This tutoral describes how to access Arvados collections on GNU/Linux using traditional filesystem tools by mounting Keep as a file system using @arv-mount@.
+GNU/Linux users can use @arv-mount@ or Gnome to mount Keep as a file system in order to access Arvados collections using traditional filesystem tools.
{% include 'tutorial_expectations' %}
-h2. Arv-mount
+# "*Mounting at the command line with arv-mount*":#arv-mount
+# "*Mounting in Gnome File manager*":#gnome
-@arv-mount@ provides several features:
+h2(#arv-mount). Arv-mount
-* You can browse, open and read Keep entries as if they are regular files.
-* It is easy for existing tools to access files in Keep.
-* Data is streamed on demand. It is not necessary to download an entire file or collection to start processing.
+@arv-mount@ provides a file system view of Arvados Keep using File System in Userspace (FUSE). You can browse, open and read Keep entries as if they are regular files, and existing tools can access files in Keep. Data is streamed on demand. It is not necessary to download an entire file or collection to start processing.
The default mode permits browsing any collection in Arvados as a subdirectory under the mount directory. To avoid having to fetch a potentially large list of all collections, collection directories only come into existence when explicitly accessed by UUID or portable data hash. For instance, a collection may be found by its content hash in the @keep/by_id@ directory.
If multiple clients (separate instances of arv-mount or other arvados applications) modify the same file in the same collection within a short time interval, this may result in a conflict. In this case, the most recent commit wins, and the "loser" will be renamed to a conflict file in the form @name~YYYYMMDD-HHMMSS~conflict~@.
Please note this feature is in beta testing. In particular, the conflict mechanism is itself currently subject to race conditions with potential for data loss when a collection is being modified simultaneously by multiple clients. This issue will be resolved in future development.
+
+h2(#gnome). Mounting in Gnome File manager
+
+As an alternative to @arv-mount@ you can also access the WebDAV mount through the Gnome File manager.
+
+# Open "Files"
+# On the left sidebar, click on "Other Locations"
+# At the bottom of the window, enter @davs://collections.ClusterID.example.com/@ When prompted for credentials, enter username "arvados" and a valid Arvados token in the @Password@ field.
---
layout: default
navsection: userguide
-title: "Accessing Keep from OS X"
+title: "Access Keep from macOS Finder"
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-OS X users can browse Keep read-only via WebDAV. Specific collections can also be accessed read-write via WebDAV.
+Users of macOS can browse Keep read-only via WebDAV. Specific collections can also be accessed read-write via WebDAV.
-h3. Browsing Keep (read-only)
+h3. Browsing Keep in Finder (read-only)
-In Finder, use "Connect to Server..." under the "Go" menu and enter @https://collections.ClusterID.example.com/@ in popup dialog. When prompted for credentials, put a valid Arvados token in the @Password@ field and anything in the Name field (it will be ignored by Arvados).
+In Finder, use "Connect to Server..." under the "Go" menu and enter @https://collections.ClusterID.example.com/@ in popup dialog. When prompted for credentials, enter username "arvados" and paste a valid Arvados token for the @Password@ field.
This mount is read-only. Write support for the @/users/@ directory is planned for a future release.
h3. Accessing a specific collection in Keep (read-write)
-In Finder, use "Connect to Server..." under the "Go" menu and enter @https://collections.ClusterID.example.com/@ in popup dialog. When prompted for credentials, put a valid Arvados token in the @Password@ field and anything in the Name field (it will be ignored by Arvados).
+In Finder, use "Connect to Server..." under the "Go" menu and enter @https://collections.ClusterID.example.com/c=your-collection-uuid@ in popup dialog. When prompted for credentials, put a valid Arvados token in the @Password@ field and anything in the Name field (it will be ignored by Arvados).
This collection is now accessible read/write.
---
layout: default
navsection: userguide
-title: "Accessing Keep from Windows"
+title: "Access Keep from Windows File Explorer"
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
Windows users can browse Keep read-only via WebDAV. Specific collections can also be accessed read-write via WebDAV.
-h3. Browsing Keep (read-only)
+h3. Browsing Keep in File Explorer (read-only)
Use the 'Map network drive' functionality, and enter @https://collections.ClusterID.example.com/@ in the Folder field. When prompted for credentials, you can fill in an arbitrary string for @Username@, it is ignored by Arvados. Windows will not accept an empty @Username@. Put a valid Arvados token in the @Password@ field.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-Arvados Data collections can be uploaded using either the @arv-put@ command line tool or using Workbench.
+Arvados Data collections can be uploaded using either Workbench or the @arv-put@ command line tool.
-# "*Upload using command line tool*":#upload-using-command
# "*Upload using Workbench*":#upload-using-workbench
+# "*Creating projects*":#creating-projects
+# "*Upload using command line tool*":#upload-using-command
+
+h2(#upload-using-workbench). Upload using Workbench
+
+To upload using Workbench, visit the Workbench *Dashboard*. Click on *Projects*<span class="caret"></span> dropdown menu in the top navigation menu and select your *Home* project or any other project of your choosing. You will see the *Data collections* tab for this project, which lists the collections in this project.
+
+To upload files into a new collection, click on *Add data*<span class="caret"></span> dropdown menu and select *Upload files from my computer*.
+
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/upload-using-workbench.png!
+
+<br/>This will create a new empty collection in your chosen project and will take you to the *Upload* tab for that collection.
+
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/upload-tab-in-new-collection.png!
+
+Click on the *Browse...* button and select the files you would like to upload. Selected files will be added to a list of files to be uploaded. After you are done selecting files to upload, click on the *<i class="fa fa-fw fa-play"></i> Start* button to start upload. This will start uploading files to Arvados and Workbench will show you the progress bar. When upload is completed, you will see an indication to that effect.
+
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/files-uploaded.png!
+
+*Note:* If you leave the collection page during the upload, the upload process will be aborted and you will need to upload the files again.
+
+*Note:* You can also use the Upload tab to add additional files to an existing collection.
notextile. <div class="spaced-out">
+h2(#creating-projects). Creating projects
+
+Files are organized into Collections, and Collections are organized by Projects.
+
+Click on *Projects*<span class="caret"></span> <span class="rarr">→</span> <i class="fa fa-fw fa-plus"></i>*Add a new project* to add a top level project.
+
+To create a subproject, navigate to the parent project, and click on <i class="fa fa-fw fa-plus"></i>*Add a subproject*.
+
+See "Sharing collections":tutorial-keep-get.html#download-shared-collection for information about sharing projects and collections with other users.
+
h2(#upload-using-command). Upload using command line tool
{% include 'tutorial_expectations' %}
<pre><code>~$ <span class="userinput">arv-put var-GS000016015-ASM.tsv.bz2</span>
216M / 216M 100.0%
Collection saved as ...
-qr1hi-4zz18-xxxxxxxxxxxxxxx
+zzzzz-4zz18-xxxxxxxxxxxxxxx
</code></pre>
</notextile>
-The output value @qr1hi-4zz18-xxxxxxxxxxxxxxx@ is the uuid of the Arvados collection created.
+The output value @zzzzz-4zz18-xxxxxxxxxxxxxxx@ is the uuid of the Arvados collection created.
Note: The file used in this example is a freely available TSV file containing variant annotations from the "Personal Genome Project (PGP)":http://www.pgp-hms.org participant "hu599905":https://my.pgp-hms.org/profile/hu599905), downloadable "here":https://warehouse.pgp-hms.org/warehouse/f815ec01d5d2f11cb12874ab2ed50daa+234+K@ant/var-GS000016015-ASM.tsv.bz2. Alternatively, you can replace @var-GS000016015-ASM.tsv.bz2@ with the name of any file you have locally, or you could get the TSV file by "downloading it from Keep.":{{site.baseurl}}/user/tutorials/tutorial-keep-get.html
~$ <span class="userinput">arv-put tmp</span>
0M / 0M 100.0%
Collection saved as ...
-qr1hi-4zz18-yyyyyyyyyyyyyyy
+zzzzz-4zz18-yyyyyyyyyyyyyyy
</code></pre>
</notextile>
Click on the *<i class="fa fa-fw fa-archive"></i> Show* button next to the collection's listing on a project page to go to the Workbench page for your collection. On this page, you can see the collection's contents, download individual files, and set sharing options.
notextile. </div>
-
-h2(#upload-using-workbench). Upload using Workbench
-
-To upload using Workbench, visit the Workbench *Dashboard*. Click on *Projects*<span class="caret"></span> dropdown menu in the top navigation menu and select your *Home* project or any other project of your choosing. You will see the *Data collections* tab for this project, which lists the collections in this project.
-
-To upload files into a new collection, click on *Add data*<span class="caret"></span> dropdown menu and select *Upload files from my computer*.
-
-!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/upload-using-workbench.png!
-
-<br/>This will create a new empty collection in your chosen project and will take you to the *Upload* tab for that collection.
-
-!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/upload-tab-in-new-collection.png!
-
-Click on the *Browse...* button and select the files you would like to upload. Selected files will be added to a list of files to be uploaded. After you are done selecting files to upload, click on the *<i class="fa fa-fw fa-play"></i> Start* button to start upload. This will start uploading files to Arvados and Workbench will show you the progress bar. When upload is completed, you will see an indication to that effect.
-
-!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/files-uploaded.png!
-
-*Note:* If you leave the collection page during the upload, the upload process will be aborted and you will need to upload the files again.
-
-*Note:* You can also use the Upload tab to add additional files to an existing collection.
# Start from the *Workbench Dashboard*. You can access the Dashboard by clicking on *<i class="fa fa-lg fa-fw fa-dashboard"></i> Dashboard* in the upper left corner of any Workbench page.
# Click on the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a process...</span> button. This will open a dialog box titled *Choose a pipeline or workflow to run*.
-# In the search box, type in *Tutorial bwa mem cwl*.
-# Select *<i class="fa fa-fw fa-gear"></i> Tutorial bwa mem cwl* and click the <span class="btn btn-sm btn-primary" >Next: choose inputs <i class="fa fa-fw fa-arrow-circle-right"></i></span> button. This will create a new process in your *Home* project and will open it. You can now supply the inputs for the process. Please note that all required inputs are populated with default values and you can change them if you prefer.
-# For example, let's see how to change *"reference" parameter* for this workflow. Click the <span class="btn btn-sm btn-primary">Choose</span> button beneath the *"reference" parameter* header. This will open a dialog box titled *Choose a dataset for "reference" parameter for cwl-runner in bwa-mem.cwl component*.
-# Open the *Home <span class="caret"></span>* menu and select *All Projects*. Search for and select *<i class="fa fa-fw fa-archive"></i> Tutorial chromosome 19 reference*. You will then see a list of files. Select *<i class="fa fa-fw fa-file"></i> 19-fasta.bwt* and click the <span class="btn btn-sm btn-primary" >OK</span> button.
-# Repeat the previous two steps to set the *"read_p1" parameter for cwl-runner script in bwa-mem.cwl component* and *"read_p2" parameter for cwl-runner script in bwa-mem.cwl component* parameters.
-# Click on the <span class="btn btn-sm btn-primary" >Run <i class="fa fa-fw fa-play"></i></span> button. The page updates to show you that the process has been submitted to run on the Arvados cluster.
-# After the process starts running, you can track the progress by watching log messages from the component(s). This page refreshes automatically. You will see a <span class="label label-success">complete</span> label when the process completes successfully.
+# In the search box, type in *bwa-mem.cwl*.
+# Select *<i class="fa fa-fw fa-gear"></i> bwa-mem.cwl* and click the <span class="btn btn-sm btn-primary" >Next: choose inputs <i class="fa fa-fw fa-arrow-circle-right"></i></span> button. This will create a new process in your *Home* project and will open it. You can now supply the inputs for the process. Please note that all required inputs are populated with default values and you can change them if you prefer.
+# For example, let's see how to set read pair *read_p1* and *read_p2* for this workflow. Click the <span class="btn btn-sm btn-primary">Choose</span> button beneath the *read_p1* header. This will open a dialog box titled *Choose a file*.
+# In the file dialog, click on *Home <span class="caret"></span>* menu and then select *All Projects*.
+# Enter *HWI-ST1027* into the search box. You will see one or more collections. Click on *<i class="fa fa-fw fa-archive"></i> HWI-ST1027_129_D0THKACXX for CWL tutorial*
+# The right hand panel will list two files. Click on the first one ending in "_1" and click the <span class="btn btn-sm btn-primary" >OK</span> button.
+# Repeat the steps 5-8 to set the *read_p2* except selecting the second file ending in "_2"
+# Scroll to the bottom of the "Inputs" panel and click on the <span class="btn btn-sm btn-primary" >Run <i class="fa fa-fw fa-play"></i></span> button. The page updates to show you that the process has been submitted to run on the Arvados cluster.
+# Once the process starts running, you can track the progress by watching log messages from the component(s). This page refreshes automatically. You will see a <span class="label label-success">complete</span> label when the process completes successfully.
# Click on the *Output* link to see the results of the process. This will load a new page listing the output files from this process. You'll see the output SAM file from the alignment tool under the *Files* tab.
# Click on the <span class="btn btn-sm btn-info"><i class="fa fa-download"></i></span> download button to the right of the SAM file to download your results.
---
layout: default
navsection: userguide
-title: "Writing a CWL workflow"
+title: "Developing workflows with CWL"
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
h2. Developing workflows
-For an introduction and and detailed documentation about writing CWL, see the "CWL User Guide":https://www.commonwl.org/user_guide and the "CWL Specification":http://commonwl.org/v1.1 .
+For an introduction and and detailed documentation about writing CWL, see the "CWL User Guide":https://www.commonwl.org/user_guide and the "CWL Specification":http://commonwl.org/v1.2 .
See "Writing Portable High-Performance Workflows":{{site.baseurl}}/user/cwl/cwl-style.html and "Arvados CWL Extensions":{{site.baseurl}}/user/cwl/cwl-extensions.html for additional information about using CWL on Arvados.
See "Software for working with CWL":https://www.commonwl.org/#Software_for_working_with_CWL for links to software tools to help create CWL documents.
-h2. Using Composer
-
-You can create new workflows in the browser using "Arvados Composer":{{site.baseurl}}/user/composer/composer.html
-
-h2. Registering a workflow to use in Workbench
-
-Use @--create-workflow@ to register a CWL workflow with Arvados. This enables you to share workflows with other Arvados users, and run them by clicking the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a process...</span> button on the Workbench Dashboard and on the command line by UUID.
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --create-workflow bwa-mem.cwl</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Upload local files: "bwa-mem.cwl"
-2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Uploaded to qr1hi-4zz18-7e0hedrmkuyoei3
-2016-07-01 12:21:01 arvados.cwl-runner[15796] INFO: Created template qr1hi-p5p6p-rjleou1dwr167v5
-qr1hi-p5p6p-rjleou1dwr167v5
-</code></pre>
-</notextile>
-
-You can provide a partial input file to set default values for the workflow input parameters. You can also use the @--name@ option to set the name of the workflow:
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --name "My workflow with defaults" --create-workflow bwa-mem.cwl bwa-mem-template.yml</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Upload local files: "bwa-mem.cwl"
-2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Uploaded to qr1hi-4zz18-0f91qkovk4ml18o
-2016-07-01 14:09:50 arvados.cwl-runner[3730] INFO: Created template qr1hi-p5p6p-0deqe6nuuyqns2i
-qr1hi-p5p6p-zuniv58hn8d0qd8
-</code></pre>
-</notextile>
-
-h3. Running registered workflows at the command line
-
-You can run a registered workflow at the command line by its UUID:
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner qr1hi-p5p6p-zuniv58hn8d0qd8 --help</span>
-/home/peter/work/scripts/venv/bin/arvados-cwl-runner 0d62edcb9d25bf4dcdb20d8872ea7b438e12fc59 1.0.20161209192028, arvados-python-client 0.1.20161212125425, cwltool 1.0.20161207161158
-Resolved 'qr1hi-p5p6p-zuniv58hn8d0qd8' to 'keep:655c6cd07550151b210961ed1d3852cf+57/bwa-mem.cwl'
-usage: qr1hi-p5p6p-zuniv58hn8d0qd8 [-h] [--PL PL] --group_id GROUP_ID
- --read_p1 READ_P1 [--read_p2 READ_P2]
- [--reference REFERENCE] --sample_id
- SAMPLE_ID
- [job_order]
-
-positional arguments:
- job_order Job input json file
-
-optional arguments:
- -h, --help show this help message and exit
- --PL PL
- --group_id GROUP_ID
- --read_p1 READ_P1 The reads, in fastq format.
- --read_p2 READ_P2 For mate paired reads, the second file (optional).
- --reference REFERENCE
- The index files produced by `bwa index`
- --sample_id SAMPLE_ID
-</code></pre>
-</notextile>
-
h2. Using cwltool
When developing a workflow, it is often helpful to run it on the local host to avoid the overhead of submitting to the cluster. To execute a workflow only on the local host (without submitting jobs to an Arvados cluster) you can use the @cwltool@ command. Note that when using @cwltool@ you must have the input data accessible on the local file system using either @arv-mount@ or @arv-get@ to fetch the data from Keep.
</notextile>
If you get the error @JavascriptException: Long-running script killed after 20 seconds.@ this may be due to the Dockerized Node.js engine taking too long to start. You may address this by installing Node.js locally (run @apt-get install nodejs@ on Debian or Ubuntu) or by specifying a longer timeout with the @--eval-timeout@ option. For example, run the workflow with @cwltool --eval-timeout=40@ for a 40-second timeout.
-
-h2. Making workflows directly executable
-
-You can make a workflow file directly executable (@cwl-runner@ should be an alias to @arvados-cwl-runner@) by adding the following line to the top of the file:
-
-<notextile>
-<pre><code>#!/usr/bin/env cwl-runner
-</code></pre>
-</notextile>
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">./bwa-mem.cwl bwa-mem-input.yml</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
-2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
-{
- "aligned_sam": {
- "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
- "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
- "class": "File",
- "size": 30738986
- }
-}
-</code></pre>
-</notextile>
-
-You can even make an input file directly executable the same way with the following two lines at the top:
-
-<notextile>
-<pre><code>#!/usr/bin/env cwl-runner
-cwl:tool: <span class="userinput">bwa-mem.cwl</span>
-</code></pre>
-</notextile>
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">./bwa-mem-input.yml</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
-2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
-{
- "aligned_sam": {
- "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
- "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
- "class": "File",
- "size": 30738986
- }
-}
-</code></pre>
-</notextile>
require (
github.com/AdRoll/goamz v0.0.0-20170825154802-2731d20f46f4
- github.com/Azure/azure-sdk-for-go v19.1.0+incompatible
- github.com/Azure/go-autorest v10.15.2+incompatible
+ github.com/Azure/azure-sdk-for-go v45.1.0+incompatible
+ github.com/Azure/go-autorest v14.2.0+incompatible
+ github.com/Azure/go-autorest/autorest v0.11.3
+ github.com/Azure/go-autorest/autorest/azure/auth v0.5.1
+ github.com/Azure/go-autorest/autorest/to v0.4.0
+ github.com/Azure/go-autorest/autorest/validation v0.3.0 // indirect
github.com/Microsoft/go-winio v0.4.5 // indirect
github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7 // indirect
github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239 // indirect
github.com/bradleypeabody/godap v0.0.0-20170216002349-c249933bc092
github.com/coreos/go-oidc v2.1.0+incompatible
github.com/coreos/go-systemd v0.0.0-20180108085132-cc4f39464dc7
- github.com/dgrijalva/jwt-go v3.1.0+incompatible // indirect
- github.com/dimchansky/utfbom v1.0.0 // indirect
github.com/dnaeon/go-vcr v1.0.1 // indirect
github.com/docker/distribution v2.6.0-rc.1.0.20180105232752-277ed486c948+incompatible // indirect
github.com/docker/docker v1.4.2-0.20180109013817-94b8a116fbf1
github.com/kevinburke/ssh_config v0.0.0-20171013211458-802051befeb5 // indirect
github.com/lib/pq v1.3.0
github.com/marstr/guid v1.1.1-0.20170427235115-8bdf7d1a087c // indirect
- github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747 // indirect
github.com/msteinert/pam v0.0.0-20190215180659-f29b9f28d6f9
github.com/opencontainers/go-digest v1.0.0-rc1 // indirect
github.com/opencontainers/image-spec v1.0.1-0.20171125024018-577479e4dc27 // indirect
github.com/sergi/go-diff v1.0.0 // indirect
github.com/sirupsen/logrus v1.4.2
github.com/src-d/gcfg v1.3.0 // indirect
- github.com/stretchr/testify v1.4.0 // indirect
github.com/xanzy/ssh-agent v0.1.0 // indirect
- golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550
+ golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9
golang.org/x/net v0.0.0-20200202094626-16171245cfb2
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45
golang.org/x/sys v0.0.0-20191105231009-c1f44814a5cd
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.38.0 h1:ROfEUZz+Gh5pa62DJWXSaonyu3StP6EA6lPEXPI6mCo=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
+github.com/Azure/azure-sdk-for-go v0.2.0-beta h1:wYBqYNMWr0WL2lcEZi+dlK9n+N0wJ0Pjs4BKeOnDjfQ=
github.com/Azure/azure-sdk-for-go v19.1.0+incompatible h1:ysqLW+tqZjJWOTE74heH/pDRbr4vlN3yV+dqQYgpyxw=
github.com/Azure/azure-sdk-for-go v19.1.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
+github.com/Azure/azure-sdk-for-go v20.2.0+incompatible h1:La3ODnagAOf5ZFUepTfVftvNTdxkq06DNpgi1l0yaM0=
+github.com/Azure/azure-sdk-for-go v20.2.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
+github.com/Azure/azure-sdk-for-go v45.1.0+incompatible h1:kxtaPD8n2z5Za+9e3sKsYG2IX6PG2R6VXtgS7gAbh3A=
+github.com/Azure/azure-sdk-for-go v45.1.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
+github.com/Azure/go-autorest v1.1.1 h1:4G9tVCqooRY3vDTB2bA1Z01PlSALtnUbji0AfzthUSs=
github.com/Azure/go-autorest v10.15.2+incompatible h1:oZpnRzZie83xGV5txbT1aa/7zpCPvURGhV6ThJij2bs=
github.com/Azure/go-autorest v10.15.2+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
+github.com/Azure/go-autorest v14.2.0+incompatible h1:V5VMDjClD3GiElqLWO7mz2MxNAK/vTfRHdAubSIPRgs=
+github.com/Azure/go-autorest v14.2.0+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
+github.com/Azure/go-autorest/autorest v0.11.0/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw=
+github.com/Azure/go-autorest/autorest v0.11.3 h1:fyYnmYujkIXUgv88D9/Wo2ybE4Zwd/TmQd5sSI5u2Ws=
+github.com/Azure/go-autorest/autorest v0.11.3/go.mod h1:JFgpikqFJ/MleTTxwepExTKnFUKKszPS8UavbQYUMuw=
+github.com/Azure/go-autorest/autorest/adal v0.9.0/go.mod h1:/c022QCutn2P7uY+/oQWWNcK9YU+MH96NgK+jErpbcg=
+github.com/Azure/go-autorest/autorest/adal v0.9.2 h1:Aze/GQeAN1RRbGmnUJvUj+tFGBzFdIg3293/A9rbxC4=
+github.com/Azure/go-autorest/autorest/adal v0.9.2/go.mod h1:/3SMAM86bP6wC9Ev35peQDUeqFZBMH07vvUOmg4z/fE=
+github.com/Azure/go-autorest/autorest/azure/auth v0.5.1 h1:bvUhZciHydpBxBmCheUgxxbSwJy7xcfjkUsjUcqSojc=
+github.com/Azure/go-autorest/autorest/azure/auth v0.5.1/go.mod h1:ea90/jvmnAwDrSooLH4sRIehEPtG/EPUXavDh31MnA4=
+github.com/Azure/go-autorest/autorest/azure/cli v0.4.0 h1:Ml+UCrnlKD+cJmSzrZ/RDcDw86NjkRUpnFh7V5JUhzU=
+github.com/Azure/go-autorest/autorest/azure/cli v0.4.0/go.mod h1:JljT387FplPzBA31vUcvsetLKF3pec5bdAxjVU4kI2s=
+github.com/Azure/go-autorest/autorest/date v0.3.0 h1:7gUk1U5M/CQbp9WoqinNzJar+8KY+LPI6wiWrP/myHw=
+github.com/Azure/go-autorest/autorest/date v0.3.0/go.mod h1:BI0uouVdmngYNUzGWeSYnokU+TrmwEsOqdt8Y6sso74=
+github.com/Azure/go-autorest/autorest/mocks v0.4.0/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k=
+github.com/Azure/go-autorest/autorest/mocks v0.4.1/go.mod h1:LTp+uSrOhSkaKrUy935gNZuuIPPVsHlr9DSOxSayd+k=
+github.com/Azure/go-autorest/autorest/to v0.4.0 h1:oXVqrxakqqV1UZdSazDOPOLvOIz+XA683u8EctwboHk=
+github.com/Azure/go-autorest/autorest/to v0.4.0/go.mod h1:fE8iZBn7LQR7zH/9XU2NcPR4o9jEImooCeWJcYV/zLE=
+github.com/Azure/go-autorest/autorest/validation v0.3.0 h1:3I9AAI63HfcLtphd9g39ruUwRI+Ca+z/f36KHPFRUss=
+github.com/Azure/go-autorest/autorest/validation v0.3.0/go.mod h1:yhLgjC0Wda5DYXl6JAsWyUe4KVNffhoDhG0zVzUMo3E=
+github.com/Azure/go-autorest/logger v0.2.0 h1:e4RVHVZKC5p6UANLJHkM4OfR1UKZPj8Wt8Pcx+3oqrE=
+github.com/Azure/go-autorest/logger v0.2.0/go.mod h1:T9E3cAhj2VqvPOtCYAvby9aBXkZmbF5NWuPV8+WeEW8=
+github.com/Azure/go-autorest/tracing v0.6.0 h1:TYi4+3m5t6K48TGI9AUdb+IzbnSxvnvUMfuitfgcfuo=
+github.com/Azure/go-autorest/tracing v0.6.0/go.mod h1:+vhtPC754Xsa23ID7GlGsrdKBpUA79WCAKPPZVC2DeU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/Microsoft/go-winio v0.4.5 h1:U2XsGR5dBg1yzwSEJoP2dE2/aAXpmad+CNG2hE9Pd5k=
github.com/Microsoft/go-winio v0.4.5/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgrijalva/jwt-go v3.1.0+incompatible h1:FFziAwDQQ2dz1XClWMkwvukur3evtZx7x/wMHKM1i20=
github.com/dgrijalva/jwt-go v3.1.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
+github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
+github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dimchansky/utfbom v1.0.0 h1:fGC2kkf4qOoKqZ4q7iIh+Vef4ubC1c38UDsEyZynZPc=
github.com/dimchansky/utfbom v1.0.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8=
+github.com/dimchansky/utfbom v1.1.0 h1:FcM3g+nofKgUteL8dm/UpdRXNC9KmADgTpLKsu0TRo4=
+github.com/dimchansky/utfbom v1.1.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQvIirEdv+8=
github.com/dnaeon/go-vcr v1.0.1 h1:r8L/HqC0Hje5AXMu1ooW8oyQyOFv4GxqpL0nRP7SLLY=
github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E=
github.com/docker/distribution v2.6.0-rc.1.0.20180105232752-277ed486c948+incompatible h1:PVtvnmmxSMUcT5AY6vG7sCCzRg3eyoW6vQvXtITC60c=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747 h1:eQox4Rh4ewJF+mqYPxCkmBAirRnPaHEB26UkNuPyjlk=
github.com/mitchellh/go-homedir v0.0.0-20161203194507-b8bc1bf76747/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
+github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
+github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550 h1:ObdrDkeb4kJdCP557AjRjq69pTHfNouLtWZG7j9rPN8=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9 h1:psW17arqaxU48Z5kZ0CQnkZWQJsqcURM6tKiBApRjXI=
+golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
}
if len(svc.InternalURLs) == 0 {
svc.InternalURLs = map[arvados.URL]arvados.ServiceInstance{
- arvados.URL{Scheme: "http", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost)), Path: "/"}: arvados.ServiceInstance{},
+ {Scheme: "http", Host: fmt.Sprintf("%s:%s", super.ListenHost, nextPort(super.ListenHost)), Path: "/"}: {},
}
}
}
"context"
"encoding/base64"
"encoding/json"
+ "errors"
"fmt"
"net/http"
"regexp"
"git.arvados.org/arvados.git/lib/cloud"
"git.arvados.org/arvados.git/sdk/go/arvados"
- "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute"
+ "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network"
storageacct "github.com/Azure/azure-sdk-for-go/services/storage/mgmt/2018-02-01/storage"
"github.com/Azure/azure-sdk-for-go/storage"
var Driver = cloud.DriverFunc(newAzureInstanceSet)
type azureInstanceSetConfig struct {
- SubscriptionID string
- ClientID string
- ClientSecret string
- TenantID string
- CloudEnvironment string
- ResourceGroup string
- Location string
- Network string
- NetworkResourceGroup string
- Subnet string
- StorageAccount string
- BlobContainer string
- DeleteDanglingResourcesAfter arvados.Duration
- AdminUsername string
+ SubscriptionID string
+ ClientID string
+ ClientSecret string
+ TenantID string
+ CloudEnvironment string
+ ResourceGroup string
+ ImageResourceGroup string
+ Location string
+ Network string
+ NetworkResourceGroup string
+ Subnet string
+ StorageAccount string
+ BlobContainer string
+ SharedImageGalleryName string
+ SharedImageGalleryImageVersion string
+ DeleteDanglingResourcesAfter arvados.Duration
+ AdminUsername string
}
type containerWrapper interface {
return r, wrapAzureError(err)
}
+type disksClientWrapper interface {
+ listByResourceGroup(ctx context.Context, resourceGroupName string) (result compute.DiskListPage, err error)
+ delete(ctx context.Context, resourceGroupName string, diskName string) (result compute.DisksDeleteFuture, err error)
+}
+
+type disksClientImpl struct {
+ inner compute.DisksClient
+}
+
+func (cl *disksClientImpl) listByResourceGroup(ctx context.Context, resourceGroupName string) (result compute.DiskListPage, err error) {
+ r, err := cl.inner.ListByResourceGroup(ctx, resourceGroupName)
+ return r, wrapAzureError(err)
+}
+
+func (cl *disksClientImpl) delete(ctx context.Context, resourceGroupName string, diskName string) (result compute.DisksDeleteFuture, err error) {
+ r, err := cl.inner.Delete(ctx, resourceGroupName, diskName)
+ return r, wrapAzureError(err)
+}
+
var quotaRe = regexp.MustCompile(`(?i:exceed|quota|limit)`)
type azureRateLimitError struct {
}
type azureInstanceSet struct {
- azconfig azureInstanceSetConfig
- vmClient virtualMachinesClientWrapper
- netClient interfacesClientWrapper
- blobcont containerWrapper
- azureEnv azure.Environment
- interfaces map[string]network.Interface
- dispatcherID string
- namePrefix string
- ctx context.Context
- stopFunc context.CancelFunc
- stopWg sync.WaitGroup
- deleteNIC chan string
- deleteBlob chan storage.Blob
- logger logrus.FieldLogger
+ azconfig azureInstanceSetConfig
+ vmClient virtualMachinesClientWrapper
+ netClient interfacesClientWrapper
+ disksClient disksClientWrapper
+ imageResourceGroup string
+ blobcont containerWrapper
+ azureEnv azure.Environment
+ interfaces map[string]network.Interface
+ dispatcherID string
+ namePrefix string
+ ctx context.Context
+ stopFunc context.CancelFunc
+ stopWg sync.WaitGroup
+ deleteNIC chan string
+ deleteBlob chan storage.Blob
+ deleteDisk chan compute.Disk
+ logger logrus.FieldLogger
}
func newAzureInstanceSet(config json.RawMessage, dispatcherID cloud.InstanceSetID, _ cloud.SharedResourceTags, logger logrus.FieldLogger) (prv cloud.InstanceSet, err error) {
az.azconfig = azcfg
vmClient := compute.NewVirtualMachinesClient(az.azconfig.SubscriptionID)
netClient := network.NewInterfacesClient(az.azconfig.SubscriptionID)
+ disksClient := compute.NewDisksClient(az.azconfig.SubscriptionID)
storageAcctClient := storageacct.NewAccountsClient(az.azconfig.SubscriptionID)
az.azureEnv, err = azure.EnvironmentFromName(az.azconfig.CloudEnvironment)
vmClient.Authorizer = authorizer
netClient.Authorizer = authorizer
+ disksClient.Authorizer = authorizer
storageAcctClient.Authorizer = authorizer
az.vmClient = &virtualMachinesClientImpl{vmClient}
az.netClient = &interfacesClientImpl{netClient}
+ az.disksClient = &disksClientImpl{disksClient}
- result, err := storageAcctClient.ListKeys(az.ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount)
- if err != nil {
- az.logger.WithError(err).Warn("Couldn't get account keys")
- return err
+ az.imageResourceGroup = az.azconfig.ImageResourceGroup
+ if az.imageResourceGroup == "" {
+ az.imageResourceGroup = az.azconfig.ResourceGroup
}
- key1 := *(*result.Keys)[0].Value
- client, err := storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv)
- if err != nil {
- az.logger.WithError(err).Warn("Couldn't make client")
- return err
- }
+ var client storage.Client
+ if az.azconfig.StorageAccount != "" && az.azconfig.BlobContainer != "" {
+ result, err := storageAcctClient.ListKeys(az.ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount)
+ if err != nil {
+ az.logger.WithError(err).Warn("Couldn't get account keys")
+ return err
+ }
- blobsvc := client.GetBlobService()
- az.blobcont = blobsvc.GetContainerReference(az.azconfig.BlobContainer)
+ key1 := *(*result.Keys)[0].Value
+ client, err = storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv)
+ if err != nil {
+ az.logger.WithError(err).Warn("Couldn't make client")
+ return err
+ }
+
+ blobsvc := client.GetBlobService()
+ az.blobcont = blobsvc.GetContainerReference(az.azconfig.BlobContainer)
+ } else if az.azconfig.StorageAccount != "" || az.azconfig.BlobContainer != "" {
+ az.logger.Error("Invalid configuration: StorageAccount and BlobContainer must both be empty or both be set")
+ }
az.dispatcherID = dispatcherID
az.namePrefix = fmt.Sprintf("compute-%s-", az.dispatcherID)
tk.Stop()
return
case <-tk.C:
- az.manageBlobs()
+ if az.blobcont != nil {
+ az.manageBlobs()
+ }
+ az.manageDisks()
}
}
}()
az.deleteNIC = make(chan string)
az.deleteBlob = make(chan storage.Blob)
+ az.deleteDisk = make(chan compute.Disk)
for i := 0; i < 4; i++ {
go func() {
- for {
- nicname, ok := <-az.deleteNIC
- if !ok {
- return
- }
+ for nicname := range az.deleteNIC {
_, delerr := az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, nicname)
if delerr != nil {
az.logger.WithError(delerr).Warnf("Error deleting %v", nicname)
}
}()
go func() {
- for {
- blob, ok := <-az.deleteBlob
- if !ok {
- return
- }
+ for blob := range az.deleteBlob {
err := blob.Delete(nil)
if err != nil {
az.logger.WithError(err).Warnf("Error deleting %v", blob.Name)
}
}
}()
+ go func() {
+ for disk := range az.deleteDisk {
+ _, err := az.disksClient.delete(az.ctx, az.imageResourceGroup, *disk.Name)
+ if err != nil {
+ az.logger.WithError(err).Warnf("Error deleting disk %+v", *disk.Name)
+ } else {
+ az.logger.Printf("Deleted disk %v", *disk.Name)
+ }
+ }
+ }()
}
return nil
}
+func (az *azureInstanceSet) cleanupNic(nic network.Interface) {
+ _, delerr := az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, *nic.Name)
+ if delerr != nil {
+ az.logger.WithError(delerr).Warnf("Error cleaning up NIC after failed create")
+ }
+}
+
func (az *azureInstanceSet) Create(
instanceType arvados.InstanceType,
imageID cloud.ImageID,
Tags: tags,
InterfacePropertiesFormat: &network.InterfacePropertiesFormat{
IPConfigurations: &[]network.InterfaceIPConfiguration{
- network.InterfaceIPConfiguration{
+ {
Name: to.StringPtr("ip1"),
InterfaceIPConfigurationPropertiesFormat: &network.InterfaceIPConfigurationPropertiesFormat{
Subnet: &network.Subnet{
return nil, wrapAzureError(err)
}
- blobname := fmt.Sprintf("%s-os.vhd", name)
- instanceVhd := fmt.Sprintf("https://%s.blob.%s/%s/%s",
- az.azconfig.StorageAccount,
- az.azureEnv.StorageEndpointSuffix,
- az.azconfig.BlobContainer,
- blobname)
-
+ var blobname string
customData := base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\n" + initCommand + "\n"))
+ var storageProfile *compute.StorageProfile
+
+ re := regexp.MustCompile(`^http(s?)://`)
+ if re.MatchString(string(imageID)) {
+ if az.blobcont == nil {
+ az.cleanupNic(nic)
+ return nil, wrapAzureError(errors.New("Invalid configuration: can't configure unmanaged image URL without StorageAccount and BlobContainer"))
+ }
+ blobname = fmt.Sprintf("%s-os.vhd", name)
+ instanceVhd := fmt.Sprintf("https://%s.blob.%s/%s/%s",
+ az.azconfig.StorageAccount,
+ az.azureEnv.StorageEndpointSuffix,
+ az.azconfig.BlobContainer,
+ blobname)
+ az.logger.Warn("using deprecated unmanaged image, see https://doc.arvados.org/ to migrate to managed disks")
+ storageProfile = &compute.StorageProfile{
+ OsDisk: &compute.OSDisk{
+ OsType: compute.Linux,
+ Name: to.StringPtr(name + "-os"),
+ CreateOption: compute.DiskCreateOptionTypesFromImage,
+ Image: &compute.VirtualHardDisk{
+ URI: to.StringPtr(string(imageID)),
+ },
+ Vhd: &compute.VirtualHardDisk{
+ URI: &instanceVhd,
+ },
+ },
+ }
+ } else {
+ id := to.StringPtr("/subscriptions/" + az.azconfig.SubscriptionID + "/resourceGroups/" + az.imageResourceGroup + "/providers/Microsoft.Compute/images/" + string(imageID))
+ if az.azconfig.SharedImageGalleryName != "" && az.azconfig.SharedImageGalleryImageVersion != "" {
+ id = to.StringPtr("/subscriptions/" + az.azconfig.SubscriptionID + "/resourceGroups/" + az.imageResourceGroup + "/providers/Microsoft.Compute/galleries/" + az.azconfig.SharedImageGalleryName + "/images/" + string(imageID) + "/versions/" + az.azconfig.SharedImageGalleryImageVersion)
+ } else if az.azconfig.SharedImageGalleryName != "" || az.azconfig.SharedImageGalleryImageVersion != "" {
+ az.cleanupNic(nic)
+ return nil, wrapAzureError(errors.New("Invalid configuration: SharedImageGalleryName and SharedImageGalleryImageVersion must both be set or both be empty"))
+ }
+ storageProfile = &compute.StorageProfile{
+ ImageReference: &compute.ImageReference{
+ ID: id,
+ },
+ OsDisk: &compute.OSDisk{
+ OsType: compute.Linux,
+ Name: to.StringPtr(name + "-os"),
+ CreateOption: compute.DiskCreateOptionTypesFromImage,
+ },
+ }
+ }
vmParameters := compute.VirtualMachine{
Location: &az.azconfig.Location,
HardwareProfile: &compute.HardwareProfile{
VMSize: compute.VirtualMachineSizeTypes(instanceType.ProviderType),
},
- StorageProfile: &compute.StorageProfile{
- OsDisk: &compute.OSDisk{
- OsType: compute.Linux,
- Name: to.StringPtr(name + "-os"),
- CreateOption: compute.FromImage,
- Image: &compute.VirtualHardDisk{
- URI: to.StringPtr(string(imageID)),
- },
- Vhd: &compute.VirtualHardDisk{
- URI: &instanceVhd,
- },
- },
- },
+ StorageProfile: storageProfile,
NetworkProfile: &compute.NetworkProfile{
NetworkInterfaces: &[]compute.NetworkInterfaceReference{
- compute.NetworkInterfaceReference{
+ {
ID: nic.ID,
NetworkInterfaceReferenceProperties: &compute.NetworkInterfaceReferenceProperties{
Primary: to.BoolPtr(true),
vm, err := az.vmClient.createOrUpdate(az.ctx, az.azconfig.ResourceGroup, name, vmParameters)
if err != nil {
- _, delerr := az.blobcont.GetBlobReference(blobname).DeleteIfExists(nil)
- if delerr != nil {
- az.logger.WithError(delerr).Warnf("Error cleaning up vhd blob after failed create")
+ // Do some cleanup. Otherwise, an unbounded number of new unused nics and
+ // blobs can pile up during times when VMs can't be created and the
+ // dispatcher keeps retrying, because the garbage collection in manageBlobs
+ // and manageNics is only triggered periodically. This is most important
+ // for nics, because those are subject to a quota.
+ az.cleanupNic(nic)
+
+ if blobname != "" {
+ _, delerr := az.blobcont.GetBlobReference(blobname).DeleteIfExists(nil)
+ if delerr != nil {
+ az.logger.WithError(delerr).Warnf("Error cleaning up vhd blob after failed create")
+ }
}
- _, delerr = az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, *nic.Name)
- if delerr != nil {
- az.logger.WithError(delerr).Warnf("Error cleaning up NIC after failed create")
- }
+ // Leave cleaning up of managed disks to the garbage collection in manageDisks()
return nil, wrapAzureError(err)
}
return instances, nil
}
-// ManageNics returns a list of Azure network interface resources.
+// manageNics returns a list of Azure network interface resources.
// Also performs garbage collection of NICs which have "namePrefix",
// are not associated with a virtual machine and have a "created-at"
// time more than DeleteDanglingResourcesAfter (to prevent racing and
return interfaces, nil
}
-// ManageBlobs garbage collects blobs (VM disk images) in the
+// manageBlobs garbage collects blobs (VM disk images) in the
// configured storage account container. It will delete blobs which
// have "namePrefix", are "available" (which means they are not
// leased to a VM) and haven't been modified for
}
}
+// manageDisks garbage collects managed compute disks (VM disk images) in the
+// configured resource group. It will delete disks which have "namePrefix",
+// are "unattached" (which means they are not leased to a VM) and were created
+// more than DeleteDanglingResourcesAfter seconds ago. (Azure provides no
+// modification timestamp on managed disks, there is only a creation timestamp)
+func (az *azureInstanceSet) manageDisks() {
+
+ re := regexp.MustCompile(`^` + regexp.QuoteMeta(az.namePrefix) + `.*-os$`)
+ threshold := time.Now().Add(-az.azconfig.DeleteDanglingResourcesAfter.Duration())
+
+ response, err := az.disksClient.listByResourceGroup(az.ctx, az.imageResourceGroup)
+ if err != nil {
+ az.logger.WithError(err).Warn("Error listing disks")
+ return
+ }
+
+ for ; response.NotDone(); err = response.Next() {
+ if err != nil {
+ az.logger.WithError(err).Warn("Error getting next page of disks")
+ return
+ }
+ for _, d := range response.Values() {
+ if d.DiskProperties.DiskState == compute.Unattached &&
+ d.Name != nil && re.MatchString(*d.Name) &&
+ d.DiskProperties.TimeCreated.ToTime().Before(threshold) {
+
+ az.logger.Printf("Disk %v is unlocked and was created at %+v, will delete", *d.Name, d.DiskProperties.TimeCreated.ToTime())
+ az.deleteDisk <- d
+ }
+ }
+ }
+}
+
func (az *azureInstanceSet) Stop() {
az.stopFunc()
az.stopWg.Wait()
close(az.deleteNIC)
close(az.deleteBlob)
+ close(az.deleteDisk)
}
type azureInstance struct {
"git.arvados.org/arvados.git/lib/dispatchcloud/test"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/config"
- "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute"
+ "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute"
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network"
"github.com/Azure/azure-sdk-for-go/storage"
"github.com/Azure/go-autorest/autorest"
func GetInstanceSet() (cloud.InstanceSet, cloud.ImageID, arvados.Cluster, error) {
cluster := arvados.Cluster{
InstanceTypes: arvados.InstanceTypeMap(map[string]arvados.InstanceType{
- "tiny": arvados.InstanceType{
+ "tiny": {
Name: "tiny",
ProviderType: "Standard_D1_v2",
VCPUs: 1,
logger: logrus.StandardLogger(),
deleteNIC: make(chan string),
deleteBlob: make(chan storage.Blob),
+ deleteDisk: make(chan compute.Disk),
}
ap.ctx, ap.stopFunc = context.WithCancel(context.Background())
ap.vmClient = &VirtualMachinesClientStub{}
DetailedError: autorest.DetailedError{
Response: &http.Response{
StatusCode: 429,
- Header: map[string][]string{"Retry-After": []string{"123"}},
+ Header: map[string][]string{"Retry-After": {"123"}},
},
},
ServiceError: &azure.ServiceError{},
var ok bool
if keyname, ok = instanceSet.keys[md5keyFingerprint]; !ok {
keyout, err := instanceSet.client.DescribeKeyPairs(&ec2.DescribeKeyPairsInput{
- Filters: []*ec2.Filter{&ec2.Filter{
+ Filters: []*ec2.Filter{{
Name: aws.String("fingerprint"),
Values: []*string{&md5keyFingerprint, &sha1keyFingerprint},
}},
KeyName: &keyname,
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
- &ec2.InstanceNetworkInterfaceSpecification{
+ {
AssociatePublicIpAddress: aws.Bool(false),
DeleteOnTermination: aws.Bool(true),
DeviceIndex: aws.Int64(0),
DisableApiTermination: aws.Bool(false),
InstanceInitiatedShutdownBehavior: aws.String("terminate"),
TagSpecifications: []*ec2.TagSpecification{
- &ec2.TagSpecification{
+ {
ResourceType: aws.String("instance"),
Tags: ec2tags,
}},
}
if instanceType.AddedScratch > 0 {
- rii.BlockDeviceMappings = []*ec2.BlockDeviceMapping{&ec2.BlockDeviceMapping{
+ rii.BlockDeviceMappings = []*ec2.BlockDeviceMapping{{
DeviceName: aws.String("/dev/xvdt"),
Ebs: &ec2.EbsBlockDevice{
DeleteOnTermination: aws.Bool(true),
}
func (e *ec2stub) RunInstances(input *ec2.RunInstancesInput) (*ec2.Reservation, error) {
- return &ec2.Reservation{Instances: []*ec2.Instance{&ec2.Instance{
+ return &ec2.Reservation{Instances: []*ec2.Instance{{
InstanceId: aws.String("i-123"),
Tags: input.TagSpecifications[0].Tags,
}}}, nil
func GetInstanceSet() (cloud.InstanceSet, cloud.ImageID, arvados.Cluster, error) {
cluster := arvados.Cluster{
InstanceTypes: arvados.InstanceTypeMap(map[string]arvados.InstanceType{
- "tiny": arvados.InstanceType{
+ "tiny": {
Name: "tiny",
ProviderType: "t2.micro",
VCPUs: 1,
Price: .02,
Preemptible: false,
},
- "tiny-with-extra-scratch": arvados.InstanceType{
+ "tiny-with-extra-scratch": {
Name: "tiny",
ProviderType: "t2.micro",
VCPUs: 1,
Preemptible: false,
AddedScratch: 20000000000,
},
- "tiny-preemptible": arvados.InstanceType{
+ "tiny-preemptible": {
Name: "tiny",
ProviderType: "t2.micro",
VCPUs: 1,
ProviderAppID: ""
ProviderAppSecret: ""
+ Test:
+ # Authenticate users listed here in the config file. This
+ # feature is intended to be used in test environments, and
+ # should not be used in production.
+ Enable: false
+ Users:
+ SAMPLE:
+ Email: alice@example.com
+ Password: xyzzy
+
# The cluster ID to delegate the user database. When set,
# logins on this cluster will be redirected to the login cluster
# (login cluster must appear in RemoteClusters with Proxy: true)
# remain valid before it needs to be revalidated.
RemoteTokenRefresh: 5m
+ # How long a client token created from a login flow will be valid without
+ # asking the user to re-login. Example values: 60m, 8h.
+ # Default value zero means tokens don't have expiration.
+ TokenLifetime: 0s
+
Git:
# Path to git or gitolite-shell executable. Each authenticated
# request will execute this program with the single argument "http-backend"
TimeoutShutdown: 10s
# Worker VM image ID.
+ # (aws) AMI identifier
+ # (azure) managed disks: the name of the managed disk image
+ # (azure) shared image gallery: the name of the image definition. Also
+ # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields.
+ # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g.
+ # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
ImageID: ""
# An executable file (located on the dispatcher host) to be
Network: ""
Subnet: ""
- # (azure) Where to store the VM VHD blobs
+ # (azure) managed disks: The resource group where the managed disk
+ # image can be found (if different from ResourceGroup).
+ ImageResourceGroup: ""
+
+ # (azure) shared image gallery: the name of the gallery
+ SharedImageGalleryName: ""
+ # (azure) shared image gallery: the version of the image definition
+ SharedImageGalleryImageVersion: ""
+
+ # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs
StorageAccount: ""
BlobContainer: ""
# a link to the multi-site search page on a "home" Workbench site.
#
# Example:
- # https://workbench.qr1hi.arvadosapi.com/collections/multisite
+ # https://workbench.zzzzz.arvadosapi.com/collections/multisite
MultiSiteSearch: ""
# Should workbench allow management of local git repositories? Set to false if
"Login.SSO.Enable": true,
"Login.SSO.ProviderAppID": false,
"Login.SSO.ProviderAppSecret": false,
+ "Login.Test": true,
+ "Login.Test.Enable": true,
+ "Login.Test.Users": false,
+ "Login.TokenLifetime": false,
"Mail": true,
"Mail.EmailFrom": false,
"Mail.IssueReporterEmailFrom": false,
ProviderAppID: ""
ProviderAppSecret: ""
+ Test:
+ # Authenticate users listed here in the config file. This
+ # feature is intended to be used in test environments, and
+ # should not be used in production.
+ Enable: false
+ Users:
+ SAMPLE:
+ Email: alice@example.com
+ Password: xyzzy
+
# The cluster ID to delegate the user database. When set,
# logins on this cluster will be redirected to the login cluster
# (login cluster must appear in RemoteClusters with Proxy: true)
# remain valid before it needs to be revalidated.
RemoteTokenRefresh: 5m
+ # How long a client token created from a login flow will be valid without
+ # asking the user to re-login. Example values: 60m, 8h.
+ # Default value zero means tokens don't have expiration.
+ TokenLifetime: 0s
+
Git:
# Path to git or gitolite-shell executable. Each authenticated
# request will execute this program with the single argument "http-backend"
TimeoutShutdown: 10s
# Worker VM image ID.
+ # (aws) AMI identifier
+ # (azure) managed disks: the name of the managed disk image
+ # (azure) shared image gallery: the name of the image definition. Also
+ # see the SharedImageGalleryName and SharedImageGalleryImageVersion fields.
+ # (azure) unmanaged disks (deprecated): the complete URI of the VHD, e.g.
+ # https://xxxxx.blob.core.windows.net/system/Microsoft.Compute/Images/images/xxxxx.vhd
ImageID: ""
# An executable file (located on the dispatcher host) to be
Network: ""
Subnet: ""
- # (azure) Where to store the VM VHD blobs
+ # (azure) managed disks: The resource group where the managed disk
+ # image can be found (if different from ResourceGroup).
+ ImageResourceGroup: ""
+
+ # (azure) shared image gallery: the name of the gallery
+ SharedImageGalleryName: ""
+ # (azure) shared image gallery: the version of the image definition
+ SharedImageGalleryImageVersion: ""
+
+ # (azure) unmanaged disks (deprecated): Where to store the VM VHD blobs
StorageAccount: ""
BlobContainer: ""
# a link to the multi-site search page on a "home" Workbench site.
#
# Example:
- # https://workbench.qr1hi.arvadosapi.com/collections/multisite
+ # https://workbench.zzzzz.arvadosapi.com/collections/multisite
MultiSiteSearch: ""
# Should workbench allow management of local git repositories? Set to false if
ClusterID: "aaaaa",
SystemRootToken: arvadostest.SystemRootToken,
RemoteClusters: map[string]arvados.RemoteCluster{
- "aaaaa": arvados.RemoteCluster{
+ "aaaaa": {
Host: os.Getenv("ARVADOS_API_HOST"),
},
},
"api_clients/" + arvadostest.TrustedWorkbenchAPIClientUUID: nil,
"api_client_authorizations/" + arvadostest.AdminTokenUUID: nil,
"authorized_keys/" + arvadostest.AdminAuthorizedKeysUUID: nil,
- "collections/" + arvadostest.CollectionWithUniqueWordsUUID: map[string]bool{"href": true},
+ "collections/" + arvadostest.CollectionWithUniqueWordsUUID: {"href": true},
"containers/" + arvadostest.RunningContainerUUID: nil,
"container_requests/" + arvadostest.QueuedContainerRequestUUID: nil,
"groups/" + arvadostest.AProjectUUID: nil,
"logs/" + arvadostest.CrunchstatForRunningJobLogUUID: nil,
"nodes/" + arvadostest.IdleNodeUUID: nil,
"repositories/" + arvadostest.ArvadosRepoUUID: nil,
- "users/" + arvadostest.ActiveUserUUID: map[string]bool{"href": true},
+ "users/" + arvadostest.ActiveUserUUID: {"href": true},
"virtual_machines/" + arvadostest.TestVMUUID: nil,
"workflows/" + arvadostest.WorkflowWithDefinitionYAMLUUID: nil,
}
wantSSO := cluster.Login.SSO.Enable
wantPAM := cluster.Login.PAM.Enable
wantLDAP := cluster.Login.LDAP.Enable
+ wantTest := cluster.Login.Test.Enable
switch {
- case wantGoogle && !wantOpenIDConnect && !wantSSO && !wantPAM && !wantLDAP:
+ case 1 != countTrue(wantGoogle, wantOpenIDConnect, wantSSO, wantPAM, wantLDAP, wantTest):
+ return errorLoginController{
+ error: errors.New("configuration problem: exactly one of Login.Google, Login.OpenIDConnect, Login.SSO, Login.PAM, Login.LDAP, and Login.Test must be enabled"),
+ }
+ case wantGoogle:
return &oidcLoginController{
Cluster: cluster,
RailsProxy: railsProxy,
EmailClaim: "email",
EmailVerifiedClaim: "email_verified",
}
- case !wantGoogle && wantOpenIDConnect && !wantSSO && !wantPAM && !wantLDAP:
+ case wantOpenIDConnect:
return &oidcLoginController{
Cluster: cluster,
RailsProxy: railsProxy,
EmailVerifiedClaim: cluster.Login.OpenIDConnect.EmailVerifiedClaim,
UsernameClaim: cluster.Login.OpenIDConnect.UsernameClaim,
}
- case !wantGoogle && !wantOpenIDConnect && wantSSO && !wantPAM && !wantLDAP:
+ case wantSSO:
return &ssoLoginController{railsProxy}
- case !wantGoogle && !wantOpenIDConnect && !wantSSO && wantPAM && !wantLDAP:
+ case wantPAM:
return &pamLoginController{Cluster: cluster, RailsProxy: railsProxy}
- case !wantGoogle && !wantOpenIDConnect && !wantSSO && !wantPAM && wantLDAP:
+ case wantLDAP:
return &ldapLoginController{Cluster: cluster, RailsProxy: railsProxy}
+ case wantTest:
+ return &testLoginController{Cluster: cluster, RailsProxy: railsProxy}
default:
return errorLoginController{
- error: errors.New("configuration problem: exactly one of Login.Google, Login.OpenIDConnect, Login.SSO, Login.PAM, and Login.LDAP must be enabled"),
+ error: errors.New("BUG: missing case in login controller setup switch"),
+ }
+ }
+}
+
+func countTrue(vals ...bool) int {
+ n := 0
+ for _, val := range vals {
+ if val {
+ n++
}
}
+ return n
}
// Login and Logout are passed through to the wrapped railsProxy;
return []*godap.LDAPSimpleSearchResultEntry{}
}
return []*godap.LDAPSimpleSearchResultEntry{
- &godap.LDAPSimpleSearchResultEntry{
+ {
DN: "cn=" + req.FilterValue + "," + req.BaseDN,
Attrs: map[string]interface{}{
"SN": req.FilterValue,
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package localdb
+
+import (
+ "bytes"
+ "context"
+ "fmt"
+ "html/template"
+
+ "git.arvados.org/arvados.git/lib/controller/rpc"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "github.com/sirupsen/logrus"
+)
+
+type testLoginController struct {
+ Cluster *arvados.Cluster
+ RailsProxy *railsProxy
+}
+
+func (ctrl *testLoginController) Logout(ctx context.Context, opts arvados.LogoutOptions) (arvados.LogoutResponse, error) {
+ return noopLogout(ctrl.Cluster, opts)
+}
+
+func (ctrl *testLoginController) Login(ctx context.Context, opts arvados.LoginOptions) (arvados.LoginResponse, error) {
+ tmpl, err := template.New("form").Parse(loginform)
+ if err != nil {
+ return arvados.LoginResponse{}, err
+ }
+ var buf bytes.Buffer
+ err = tmpl.Execute(&buf, opts)
+ if err != nil {
+ return arvados.LoginResponse{}, err
+ }
+ return arvados.LoginResponse{HTML: buf}, nil
+}
+
+func (ctrl *testLoginController) UserAuthenticate(ctx context.Context, opts arvados.UserAuthenticateOptions) (arvados.APIClientAuthorization, error) {
+ for username, user := range ctrl.Cluster.Login.Test.Users {
+ if (opts.Username == username || opts.Username == user.Email) && opts.Password == user.Password {
+ ctxlog.FromContext(ctx).WithFields(logrus.Fields{
+ "username": username,
+ "email": user.Email,
+ }).Debug("test authentication succeeded")
+ return createAPIClientAuthorization(ctx, ctrl.RailsProxy, ctrl.Cluster.SystemRootToken, rpc.UserSessionAuthInfo{
+ Username: username,
+ Email: user.Email,
+ })
+ }
+ }
+ return arvados.APIClientAuthorization{}, fmt.Errorf("authentication failed for user %q with password len=%d", opts.Username, len(opts.Password))
+}
+
+const loginform = `
+<!doctype html>
+<html>
+ <head><title>Arvados test login</title>
+ <script>
+ async function authenticate(event) {
+ event.preventDefault()
+ document.getElementById('error').innerHTML = ''
+ const resp = await fetch('/arvados/v1/users/authenticate', {
+ method: 'POST',
+ mode: 'same-origin',
+ headers: {'Content-Type': 'application/json'},
+ body: JSON.stringify({
+ username: document.getElementById('username').value,
+ password: document.getElementById('password').value,
+ }),
+ })
+ if (!resp.ok) {
+ document.getElementById('error').innerHTML = '<p>Authentication failed.</p><p>The "test login" users are defined in Clusters.[ClusterID].Login.Test.Users section of config.yml</p><p>If you are using arvbox, use "arvbox adduser" to add users.</p>'
+ return
+ }
+ var redir = document.getElementById('return_to').value
+ if (redir.indexOf('?') > 0) {
+ redir += '&'
+ } else {
+ redir += '?'
+ }
+ const respj = await resp.json()
+ document.location = redir + "api_token=" + respj.api_token
+ }
+ </script>
+ </head>
+ <body>
+ <h3>Arvados test login</h3>
+ <form method="POST">
+ <input id="return_to" type="hidden" name="return_to" value="{{.ReturnTo}}">
+ username <input id="username" type="text" name="username" size=16>
+ password <input id="password" type="password" name="password" size=16>
+ <input type="submit" value="Log in">
+ <br>
+ <p id="error"></p>
+ </form>
+ </body>
+ <script>
+ document.getElementsByTagName('form')[0].onsubmit = authenticate
+ </script>
+</html>
+`
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package localdb
+
+import (
+ "context"
+
+ "git.arvados.org/arvados.git/lib/config"
+ "git.arvados.org/arvados.git/lib/controller/rpc"
+ "git.arvados.org/arvados.git/lib/ctrlctx"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadostest"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "github.com/jmoiron/sqlx"
+ check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&TestUserSuite{})
+
+type TestUserSuite struct {
+ cluster *arvados.Cluster
+ ctrl *testLoginController
+ railsSpy *arvadostest.Proxy
+ db *sqlx.DB
+
+ // transaction context
+ ctx context.Context
+ rollback func() error
+}
+
+func (s *TestUserSuite) SetUpSuite(c *check.C) {
+ cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
+ c.Assert(err, check.IsNil)
+ s.cluster, err = cfg.GetCluster("")
+ c.Assert(err, check.IsNil)
+ s.cluster.Login.Test.Enable = true
+ s.cluster.Login.Test.Users = map[string]arvados.TestUser{
+ "valid": {Email: "valid@example.com", Password: "v@l1d"},
+ }
+ s.railsSpy = arvadostest.NewProxy(c, s.cluster.Services.RailsAPI)
+ s.ctrl = &testLoginController{
+ Cluster: s.cluster,
+ RailsProxy: rpc.NewConn(s.cluster.ClusterID, s.railsSpy.URL, true, rpc.PassthroughTokenProvider),
+ }
+ s.db = arvadostest.DB(c, s.cluster)
+}
+
+func (s *TestUserSuite) SetUpTest(c *check.C) {
+ tx, err := s.db.Beginx()
+ c.Assert(err, check.IsNil)
+ s.ctx = ctrlctx.NewWithTransaction(context.Background(), tx)
+ s.rollback = tx.Rollback
+}
+
+func (s *TestUserSuite) TearDownTest(c *check.C) {
+ if s.rollback != nil {
+ s.rollback()
+ }
+}
+
+func (s *TestUserSuite) TestLogin(c *check.C) {
+ for _, trial := range []struct {
+ success bool
+ username string
+ password string
+ }{
+ {false, "foo", "bar"},
+ {false, "", ""},
+ {false, "valid", ""},
+ {false, "", "v@l1d"},
+ {true, "valid", "v@l1d"},
+ {true, "valid@example.com", "v@l1d"},
+ } {
+ c.Logf("=== %#v", trial)
+ resp, err := s.ctrl.UserAuthenticate(s.ctx, arvados.UserAuthenticateOptions{
+ Username: trial.username,
+ Password: trial.password,
+ })
+ if trial.success {
+ c.Check(err, check.IsNil)
+ c.Check(resp.APIToken, check.Not(check.Equals), "")
+ c.Check(resp.UUID, check.Matches, `zzzzz-gj3su-.*`)
+ c.Check(resp.Scopes, check.DeepEquals, []string{"all"})
+
+ authinfo := getCallbackAuthInfo(c, s.railsSpy)
+ c.Check(authinfo.Email, check.Equals, "valid@example.com")
+ c.Check(authinfo.AlternateEmails, check.DeepEquals, []string(nil))
+ } else {
+ c.Check(err, check.ErrorMatches, `authentication failed.*`)
+ }
+ }
+}
+
+func (s *TestUserSuite) TestLoginForm(c *check.C) {
+ resp, err := s.ctrl.Login(s.ctx, arvados.LoginOptions{
+ ReturnTo: "https://localhost:12345/example",
+ })
+ c.Check(err, check.IsNil)
+ c.Check(resp.HTML.String(), check.Matches, `(?ms).*<form method="POST".*`)
+ c.Check(resp.HTML.String(), check.Matches, `(?ms).*<input id="return_to" type="hidden" name="return_to" value="https://localhost:12345/example">.*`)
+}
type TokenProvider func(context.Context) ([]string, error)
func PassthroughTokenProvider(ctx context.Context) ([]string, error) {
- if incoming, ok := auth.FromContext(ctx); !ok {
+ incoming, ok := auth.FromContext(ctx)
+ if !ok {
return nil, errors.New("no token provided")
- } else {
- return incoming.Tokens, nil
}
+ return incoming.Tokens, nil
}
type Conn struct {
u.User = nil
u.Host = ""
return u.String()
- } else {
- return location
}
+ return location
}
func (conn *Conn) CollectionCreate(ctx context.Context, options arvados.CreateOptions) (arvados.Collection, error) {
return err
}
return w.Close()
- } else {
- // Dispatched via crunch-dispatch-slurm. Look up
- // apiserver's node record corresponding to
- // $SLURMD_NODENAME.
- hostname := os.Getenv("SLURMD_NODENAME")
- if hostname == "" {
- hostname, _ = os.Hostname()
- }
- _, err := runner.logAPIResponse("node", "nodes", map[string]interface{}{"filters": [][]string{{"hostname", "=", hostname}}}, func(resp interface{}) {
- // The "info" field has admin-only info when
- // obtained with a privileged token, and
- // should not be logged.
- node, ok := resp.(map[string]interface{})
- if ok {
- delete(node, "info")
- }
- })
- return err
}
+ // Dispatched via crunch-dispatch-slurm. Look up
+ // apiserver's node record corresponding to
+ // $SLURMD_NODENAME.
+ hostname := os.Getenv("SLURMD_NODENAME")
+ if hostname == "" {
+ hostname, _ = os.Hostname()
+ }
+ _, err := runner.logAPIResponse("node", "nodes", map[string]interface{}{"filters": [][]string{{"hostname", "=", hostname}}}, func(resp interface{}) {
+ // The "info" field has admin-only info when
+ // obtained with a privileged token, and
+ // should not be logged.
+ node, ok := resp.(map[string]interface{})
+ if ok {
+ delete(node, "info")
+ }
+ })
+ return err
}
func (runner *ContainerRunner) logAPIResponse(label, path string, params map[string]interface{}, munge func(interface{})) (logged bool, err error) {
return runner.token, nil
}
-// UpdateContainerComplete updates the container record state on API
+// UpdateContainerFinal updates the container record state on API
// server to "Complete" or "Cancelled"
func (runner *ContainerRunner) UpdateContainerFinal() error {
update := arvadosclient.Dict{}
"golang.org/x/crypto/ssh"
)
-// Map of available cloud drivers.
+// Drivers is a map of available cloud drivers.
// Clusters.*.Containers.CloudVMs.Driver configuration values
// correspond to keys in this map.
var Drivers = map[string]cloud.Driver{
func boolLabelValue(v bool) string {
if v {
return "1"
- } else {
- return "0"
}
+ return "0"
}
test.InstanceType(2): 0,
},
running: map[string]time.Time{
- test.ContainerUUID(2): time.Time{},
+ test.ContainerUUID(2): {},
},
}
queue := test.Queue{
}
var allowContainerUpdate = map[arvados.ContainerState]map[arvados.ContainerState]bool{
- arvados.ContainerStateQueued: map[arvados.ContainerState]bool{
+ arvados.ContainerStateQueued: {
arvados.ContainerStateQueued: true,
arvados.ContainerStateLocked: true,
arvados.ContainerStateCancelled: true,
},
- arvados.ContainerStateLocked: map[arvados.ContainerState]bool{
+ arvados.ContainerStateLocked: {
arvados.ContainerStateQueued: true,
arvados.ContainerStateLocked: true,
arvados.ContainerStateRunning: true,
arvados.ContainerStateCancelled: true,
},
- arvados.ContainerStateRunning: map[arvados.ContainerState]bool{
+ arvados.ContainerStateRunning: {
arvados.ContainerStateRunning: true,
arvados.ContainerStateCancelled: true,
arvados.ContainerStateComplete: true,
}
if sis.allowCreateCall.After(time.Now()) {
return nil, RateLimitError{sis.allowCreateCall}
- } else {
- sis.allowCreateCall = time.Now().Add(sis.driver.MinTimeBetweenCreateCalls)
}
-
+ sis.allowCreateCall = time.Now().Add(sis.driver.MinTimeBetweenCreateCalls)
ak := sis.driver.AuthorizedKeys
if authKey != nil {
ak = append([]ssh.PublicKey{authKey}, ak...)
defer sis.mtx.RUnlock()
if sis.allowInstancesCall.After(time.Now()) {
return nil, RateLimitError{sis.allowInstancesCall}
- } else {
- sis.allowInstancesCall = time.Now().Add(sis.driver.MinTimeBetweenInstancesCalls)
}
+ sis.allowInstancesCall = time.Now().Add(sis.driver.MinTimeBetweenInstancesCalls)
var r []cloud.Instance
for _, ss := range sis.servers {
r = append(r, ss.Instance())
if running {
fmt.Fprintf(stderr, "%s: container is running\n", uuid)
return 1
- } else {
- fmt.Fprintf(stderr, "%s: container is not running\n", uuid)
- return 0
}
+ fmt.Fprintf(stderr, "%s: container is not running\n", uuid)
+ return 0
}
if command == "true" {
return 0
return regexp.MustCompile(`\S+`).ReplaceAllStringFunc(manifest, func(tok string) string {
if mBlkRe.MatchString(tok) {
return SignLocator(mPermHintRe.ReplaceAllString(tok, ""), apiToken, expiry, ttl, permissionSecret)
- } else {
- return tok
}
+ return tok
})
}
var DefaultConfigFile = func() string {
if path := os.Getenv("ARVADOS_CONFIG"); path != "" {
return path
- } else {
- return "/etc/arvados/config.yml"
}
+ return "/etc/arvados/config.yml"
}()
type Config struct {
ProviderAppID string
ProviderAppSecret string
}
+ Test struct {
+ Enable bool
+ Users map[string]TestUser
+ }
LoginCluster string
RemoteTokenRefresh Duration
+ TokenLifetime Duration
}
Mail struct {
MailchimpAPIKey string
ExternalURL URL
}
+type TestUser struct {
+ Email string
+ Password string
+}
+
// URL is a url.URL that is also usable as a JSON key/value.
type URL url.URL
seg.Truncate(len(cando))
fn.memsize += int64(len(cando))
fn.segments[cur] = seg
- cur++
- prev++
}
}
// situation might be rare anyway)
segIdx, pos = 0, 0
}
- for next := int64(0); segIdx < len(segments); segIdx++ {
+ for ; segIdx < len(segments); segIdx++ {
seg := segments[segIdx]
- next = pos + int64(seg.Len())
+ next := pos + int64(seg.Len())
if next <= offset || seg.Len() == 0 {
pos = next
continue
// Ensure collection was flushed by Sync
var latest Collection
err = s.client.RequestAndDecode(&latest, "GET", "arvados/v1/collections/"+oob.UUID, nil, nil)
+ c.Check(err, check.IsNil)
c.Check(latest.ManifestText, check.Matches, `.*:test.txt.*\n`)
// Delete test.txt behind s.fs's back by updating the
return nil
}
-// Index returns an unsorted list of blocks at the given mount point.
+// IndexMount returns an unsorted list of blocks at the given mount point.
func (s *KeepService) IndexMount(ctx context.Context, c *Client, mountUUID string, prefix string) ([]KeepServiceIndexEntry, error) {
return s.index(ctx, c, s.url("mounts/"+mountUUID+"/blocks?prefix="+prefix))
}
e.HttpStatusCode,
e.HttpStatusMessage,
e.ServerAddress)
- } else {
- return fmt.Sprintf("arvados API server error: %d: %s returned by %s",
- e.HttpStatusCode,
- e.HttpStatusMessage,
- e.ServerAddress)
}
+ return fmt.Sprintf("arvados API server error: %d: %s returned by %s",
+ e.HttpStatusCode,
+ e.HttpStatusMessage,
+ e.ServerAddress)
}
// StringBool tests whether s is suggestive of true. It returns true
value, found = c.DiscoveryDoc[parameter]
if found {
return value, nil
- } else {
- return value, ErrInvalidArgument
}
+ return value, ErrInvalidArgument
}
func (ac *ArvadosClient) httpClient() *http.Client {
defer ks.listener.Close()
arv, err := arvadosclient.MakeArvadosClient()
+ c.Check(err, IsNil)
kc, _ := MakeKeepClient(arv)
arv.ApiToken = "abc123"
kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
func (rs RootSorter) getWeight(hash string, uuid string) string {
if len(uuid) == 27 {
return Md5String(hash + uuid[12:])
- } else {
- // Only useful for testing, a set of one service root, etc.
- return Md5String(hash + uuid)
}
+ // Only useful for testing, a set of one service root, etc.
+ return Md5String(hash + uuid)
}
func (rs RootSorter) GetSortedRoots() []string {
sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
// The next server to try contacting
- next_server := 0
+ nextServer := 0
// The number of active writers
active := 0
for retriesRemaining > 0 {
retriesRemaining -= 1
- next_server = 0
+ nextServer = 0
retryServers = []string{}
for replicasTodo > 0 {
for active*replicasPerThread < replicasTodo {
// Start some upload requests
- if next_server < len(sv) {
- DebugPrintf("DEBUG: [%s] Begin upload %s to %s", reqid, hash, sv[next_server])
- go this.uploadToKeepServer(sv[next_server], hash, getReader(), upload_status, expectedLength, reqid)
- next_server += 1
+ if nextServer < len(sv) {
+ DebugPrintf("DEBUG: [%s] Begin upload %s to %s", reqid, hash, sv[nextServer])
+ go this.uploadToKeepServer(sv[nextServer], hash, getReader(), upload_status, expectedLength, reqid)
+ nextServer += 1
active += 1
} else {
if active == 0 && retriesRemaining == 0 {
}
msg = msg[:len(msg)-2]
return locator, replicasDone, InsufficientReplicasError(errors.New(msg))
- } else {
- break
}
+ break
}
}
DebugPrintf("DEBUG: [%s] Replicas remaining to write: %v active uploads: %v",
find_or_create_by(url_prefix: api_client_url_prefix)
end
+ token_expiration = nil
+ if Rails.configuration.Login.TokenLifetime > 0
+ token_expiration = Time.now + Rails.configuration.Login.TokenLifetime
+ end
@api_client_auth = ApiClientAuthorization.
new(user: user,
api_client: @api_client,
created_by_ip_address: remote_ip,
+ expires_at: token_expiration,
scopes: ["all"])
@api_client_auth.save!
end
def is_trusted
- norm(self.url_prefix) == norm(Rails.configuration.Services.Workbench1.ExternalURL) ||
- norm(self.url_prefix) == norm(Rails.configuration.Services.Workbench2.ExternalURL) ||
- super
+ (from_trusted_url && Rails.configuration.Login.TokenLifetime == 0) || super
end
protected
+ def from_trusted_url
+ norm(self.url_prefix) == norm(Rails.configuration.Services.Workbench1.ExternalURL) ||
+ norm(self.url_prefix) == norm(Rails.configuration.Services.Workbench2.ExternalURL)
+ end
+
def norm url
# normalize URL for comparison
url = URI(url)
return ApiClientAuthorization.new(user: User.find_by_uuid(system_user_uuid),
uuid: Rails.configuration.ClusterID+"-gj3su-000000000000000",
api_token: token,
- api_client: ApiClient.new(is_trusted: true, url_prefix: ""))
+ api_client: system_root_token_api_client)
else
return nil
end
anonymous_group
anonymous_group_read_permission
anonymous_user
+ system_root_token_api_client
empty_collection
refresh_permissions
refresh_trashed
require "rails/test_unit/railtie"
# Skipping the following:
# * ActionCable (new in Rails 5.0) as it adds '/cable' routes that we're not using
-# * Skip ActiveStorage (new in Rails 5.1)
+# * ActiveStorage (new in Rails 5.1)
require 'digest'
arvcfg.declare_config "Login.SSO.ProviderAppID", String, :sso_app_id
arvcfg.declare_config "Login.LoginCluster", String
arvcfg.declare_config "Login.RemoteTokenRefresh", ActiveSupport::Duration
+arvcfg.declare_config "Login.TokenLifetime", ActiveSupport::Duration
arvcfg.declare_config "TLS.Insecure", Boolean, :sso_insecure
arvcfg.declare_config "Services.SSO.ExternalURL", String, :sso_provider_url
arvcfg.declare_config "AuditLogs.MaxAge", ActiveSupport::Duration, :max_audit_log_age
end
end
+ def system_root_token_api_client
+ $system_root_token_api_client = check_cache $system_root_token_api_client do
+ act_as_system_user do
+ ActiveRecord::Base.transaction do
+ ApiClient.find_or_create_by!(is_trusted: true, url_prefix: "", name: "SystemRootToken")
+ end
+ end
+ end
+ end
+
def empty_collection_pdh
'd41d8cd98f00b204e9800998ecf8427e+0'
end
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# Tasks that can be useful when changing token expiration policies by assigning
+# a non-zero value to Login.TokenLifetime config.
+
+require 'set'
+require 'current_api_client'
+
+namespace :db do
+ desc "Apply expiration policy on long lived tokens"
+ task fix_long_lived_tokens: :environment do
+ if Rails.configuration.Login.TokenLifetime == 0
+ puts("No expiration policy set on Login.TokenLifetime.")
+ else
+ exp_date = Time.now + Rails.configuration.Login.TokenLifetime
+ puts("Setting token expiration to: #{exp_date}")
+ token_count = 0
+ ll_tokens.each do |auth|
+ if (auth.user.uuid =~ /-tpzed-000000000000000/).nil?
+ CurrentApiClientHelper.act_as_system_user do
+ auth.update_attributes!(expires_at: exp_date)
+ end
+ token_count += 1
+ end
+ end
+ puts("#{token_count} tokens updated.")
+ end
+ end
+
+ desc "Show users with long lived tokens"
+ task check_long_lived_tokens: :environment do
+ user_ids = Set.new()
+ token_count = 0
+ ll_tokens.each do |auth|
+ if (auth.user.uuid =~ /-tpzed-000000000000000/).nil?
+ user_ids.add(auth.user_id)
+ token_count += 1
+ end
+ end
+
+ if user_ids.size > 0
+ puts("Found #{token_count} long-lived tokens from users:")
+ user_ids.each do |uid|
+ u = User.find(uid)
+ puts("#{u.username},#{u.email},#{u.uuid}") if !u.nil?
+ end
+ else
+ puts("No long-lived tokens found.")
+ end
+ end
+
+ def ll_tokens
+ query = ApiClientAuthorization.where(expires_at: nil)
+ if Rails.configuration.Login.TokenLifetime > 0
+ query = query.or(ApiClientAuthorization.where("expires_at > ?", Time.now + Rails.configuration.Login.TokenLifetime))
+ end
+ query
+ end
+end
name: Untrusted
url_prefix: https://untrusted.local/
is_trusted: false
+
+system_root_token_api_client:
+ uuid: zzzzz-ozdt8-pbw7foaks3qjyej
+ owner_uuid: zzzzz-tpzed-000000000000000
+ name: SystemRootToken
+ url_prefix: ""
+ is_trusted: true
assert_nil assigns(:api_client)
end
-
test "send token when user is already logged in" do
authorize_with :inactive
api_client_page = 'http://client.example.com/home'
assert_not_nil assigns(:api_client)
end
+ test "login creates token without expiration by default" do
+ assert_equal Rails.configuration.Login.TokenLifetime, 0
+ authorize_with :inactive
+ api_client_page = 'http://client.example.com/home'
+ get :login, params: {return_to: api_client_page}
+ assert_not_nil assigns(:api_client)
+ assert_nil assigns(:api_client_auth).expires_at
+ end
+
+ test "login creates token with configured lifetime" do
+ token_lifetime = 1.hour
+ Rails.configuration.Login.TokenLifetime = token_lifetime
+ authorize_with :inactive
+ api_client_page = 'http://client.example.com/home'
+ get :login, params: {return_to: api_client_page}
+ assert_not_nil assigns(:api_client)
+ api_client_auth = assigns(:api_client_auth)
+ assert_in_delta(api_client_auth.expires_at,
+ api_client_auth.updated_at + token_lifetime,
+ 1.second)
+ end
+
test "login with remote param returns a salted token" do
authorize_with :inactive
api_client_page = 'http://client.example.com/home'
assert_response :success
end
- test "create token for different user" do
- post "/arvados/v1/api_client_authorizations",
- params: {
- :format => :json,
- :api_client_authorization => {
- :owner_uuid => users(:spectator).uuid
- }
- },
- headers: {'HTTP_AUTHORIZATION' => "OAuth2 #{api_client_authorizations(:admin_trustedclient).api_token}"}
- assert_response :success
+ [:admin_trustedclient, :SystemRootToken].each do |tk|
+ test "create token for different user using #{tk}" do
+ if tk == :SystemRootToken
+ token = "xyzzy-SystemRootToken"
+ Rails.configuration.SystemRootToken = token
+ else
+ token = api_client_authorizations(tk).api_token
+ end
+
+ post "/arvados/v1/api_client_authorizations",
+ params: {
+ :format => :json,
+ :api_client_authorization => {
+ :owner_uuid => users(:spectator).uuid
+ }
+ },
+ headers: {'HTTP_AUTHORIZATION' => "OAuth2 #{token}"}
+ assert_response :success
+
+ get "/arvados/v1/users/current",
+ params: {:format => :json},
+ headers: {'HTTP_AUTHORIZATION' => "OAuth2 #{json_response['api_token']}"}
+ @json_response = nil
+ assert_equal json_response['uuid'], users(:spectator).uuid
+ end
+ end
+ test "System root token is system user" do
+ token = "xyzzy-SystemRootToken"
+ Rails.configuration.SystemRootToken = token
get "/arvados/v1/users/current",
- params: {:format => :json},
- headers: {'HTTP_AUTHORIZATION' => "OAuth2 #{json_response['api_token']}"}
- @json_response = nil
- assert_equal users(:spectator).uuid, json_response['uuid']
+ params: {:format => :json},
+ headers: {'HTTP_AUTHORIZATION' => "OAuth2 #{token}"}
+ assert_equal json_response['uuid'], system_user_uuid
end
test "refuse to create token for different user if not trusted client" do
class ApiClientTest < ActiveSupport::TestCase
include CurrentApiClient
- test "configured workbench is trusted" do
- Rails.configuration.Services.Workbench1.ExternalURL = URI("http://wb1.example.com")
- Rails.configuration.Services.Workbench2.ExternalURL = URI("https://wb2.example.com:443")
+ [true, false].each do |token_lifetime_enabled|
+ test "configured workbench is trusted when token lifetime is#{token_lifetime_enabled ? '': ' not'} enabled" do
+ Rails.configuration.Login.TokenLifetime = token_lifetime_enabled ? 8.hours : 0
+ Rails.configuration.Services.Workbench1.ExternalURL = URI("http://wb1.example.com")
+ Rails.configuration.Services.Workbench2.ExternalURL = URI("https://wb2.example.com:443")
- act_as_system_user do
- [["http://wb0.example.com", false],
- ["http://wb1.example.com", true],
- ["http://wb2.example.com", false],
- ["https://wb2.example.com", true],
- ["https://wb2.example.com/", true],
- ].each do |pfx, result|
- a = ApiClient.create(url_prefix: pfx, is_trusted: false)
- assert_equal result, a.is_trusted
- end
+ act_as_system_user do
+ [["http://wb0.example.com", false],
+ ["http://wb1.example.com", true],
+ ["http://wb2.example.com", false],
+ ["https://wb2.example.com", true],
+ ["https://wb2.example.com/", true],
+ ].each do |pfx, result|
+ a = ApiClient.create(url_prefix: pfx, is_trusted: false)
+ if token_lifetime_enabled
+ assert_equal false, a.is_trusted, "API client with url prefix '#{pfx}' shouldn't be trusted"
+ else
+ assert_equal result, a.is_trusted
+ end
+ end
- a = ApiClient.create(url_prefix: "http://example.com", is_trusted: true)
- a.save!
- a.reload
- assert a.is_trusted
+ a = ApiClient.create(url_prefix: "http://example.com", is_trusted: true)
+ a.save!
+ a.reload
+ assert a.is_trusted
+ end
end
end
end
s.cluster, err = cfg.GetCluster("")
c.Assert(err, check.Equals, nil)
- s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: "localhost:0"}: arvados.ServiceInstance{}}
+ s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{{Host: "localhost:0"}: {}}
s.cluster.TLS.Insecure = true
s.cluster.Git.GitCommand = "/usr/bin/git"
s.cluster.Git.Repositories = repoRoot
s.cluster, err = cfg.GetCluster("")
c.Assert(err, check.Equals, nil)
- s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: "localhost:80"}: arvados.ServiceInstance{}}
+ s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{{Host: "localhost:80"}: {}}
s.cluster.Git.GitoliteHome = "/test/ghh"
s.cluster.Git.Repositories = "/"
}
s.cluster, err = cfg.GetCluster("")
c.Assert(err, check.Equals, nil)
- s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: "localhost:0"}: arvados.ServiceInstance{}}
+ s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{{Host: "localhost:0"}: {}}
s.cluster.TLS.Insecure = true
s.cluster.Git.GitCommand = "/usr/share/gitolite3/gitolite-shell"
s.cluster.Git.GitoliteHome = s.gitoliteHome
s.cluster, err = cfg.GetCluster("")
c.Assert(err, check.Equals, nil)
- s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: "localhost:0"}: arvados.ServiceInstance{}}
+ s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{{Host: "localhost:0"}: {}}
s.cluster.TLS.Insecure = true
s.cluster.Git.GitCommand = "/usr/bin/git"
s.cluster.Git.Repositories = s.tmpRepoRoot
+++ /dev/null
-arv-web enables you to run a custom web service using the contents of an
-Arvados collection.
-
-See "Using arv-web" in the Arvados user guide:
-
-http://doc.arvados.org/user/topics/arv-web.html
+++ /dev/null
-#!/usr/bin/env python
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-# arv-web enables you to run a custom web service from the contents of an Arvados collection.
-#
-# See http://doc.arvados.org/user/topics/arv-web.html
-
-import arvados
-from arvados.safeapi import ThreadSafeApiCache
-import subprocess
-from arvados_fuse import Operations, CollectionDirectory
-import tempfile
-import os
-import llfuse
-import threading
-import Queue
-import argparse
-import logging
-import signal
-import sys
-import functools
-
-logger = logging.getLogger('arvados.arv-web')
-logger.setLevel(logging.INFO)
-
-class ArvWeb(object):
- def __init__(self, project, docker_image, port):
- self.project = project
- self.loop = True
- self.cid = None
- self.prev_docker_image = None
- self.mountdir = None
- self.collection = None
- self.override_docker_image = docker_image
- self.port = port
- self.evqueue = Queue.Queue()
- self.api = ThreadSafeApiCache(arvados.config.settings())
-
- if arvados.util.group_uuid_pattern.match(project) is None:
- raise arvados.errors.ArgumentError("Project uuid is not valid")
-
- collections = self.api.collections().list(filters=[["owner_uuid", "=", project]],
- limit=1,
- order='modified_at desc').execute()['items']
- self.newcollection = collections[0]['uuid'] if collections else None
-
- self.ws = arvados.events.subscribe(self.api, [["object_uuid", "is_a", "arvados#collection"]], self.on_message)
-
- def check_docker_running(self):
- # It would be less hacky to use "docker events" than poll "docker ps"
- # but that would require writing a bigger pile of code.
- if self.cid:
- ps = subprocess.check_output(["docker", "ps", "--no-trunc=true", "--filter=status=running"])
- for l in ps.splitlines():
- if l.startswith(self.cid):
- return True
- return False
-
- # Handle messages from Arvados event bus.
- def on_message(self, ev):
- if 'event_type' in ev:
- old_attr = None
- if 'old_attributes' in ev['properties'] and ev['properties']['old_attributes']:
- old_attr = ev['properties']['old_attributes']
- if self.project not in (ev['properties']['new_attributes']['owner_uuid'],
- old_attr['owner_uuid'] if old_attr else None):
- return
-
- et = ev['event_type']
- if ev['event_type'] == 'update':
- if ev['properties']['new_attributes']['owner_uuid'] != ev['properties']['old_attributes']['owner_uuid']:
- if self.project == ev['properties']['new_attributes']['owner_uuid']:
- et = 'add'
- else:
- et = 'remove'
- if ev['properties']['new_attributes']['trash_at'] is not None:
- et = 'remove'
-
- self.evqueue.put((self.project, et, ev['object_uuid']))
-
- # Run an arvados_fuse mount under the control of the local process. This lets
- # us switch out the contents of the directory without having to unmount and
- # remount.
- def run_fuse_mount(self):
- self.mountdir = tempfile.mkdtemp()
-
- self.operations = Operations(os.getuid(), os.getgid(), self.api, "utf-8")
- self.cdir = CollectionDirectory(llfuse.ROOT_INODE, self.operations.inodes, self.api, 2, self.collection)
- self.operations.inodes.add_entry(self.cdir)
-
- # Initialize the fuse connection
- llfuse.init(self.operations, self.mountdir, ['allow_other'])
-
- t = threading.Thread(None, llfuse.main)
- t.start()
-
- # wait until the driver is finished initializing
- self.operations.initlock.wait()
-
- def mount_collection(self):
- if self.newcollection != self.collection:
- self.collection = self.newcollection
- if not self.mountdir and self.collection:
- self.run_fuse_mount()
-
- if self.mountdir:
- with llfuse.lock:
- self.cdir.clear()
- # Switch the FUSE directory object so that it stores
- # the newly selected collection
- if self.collection:
- logger.info("Mounting %s", self.collection)
- else:
- logger.info("Mount is empty")
- self.cdir.change_collection(self.collection)
-
-
- def stop_docker(self):
- if self.cid:
- logger.info("Stopping Docker container")
- subprocess.call(["docker", "stop", self.cid])
- self.cid = None
-
- def run_docker(self):
- try:
- if self.collection is None:
- self.stop_docker()
- return
-
- docker_image = None
- if self.override_docker_image:
- docker_image = self.override_docker_image
- else:
- try:
- with llfuse.lock:
- if "docker_image" in self.cdir:
- docker_image = self.cdir["docker_image"].readfrom(0, 1024).strip()
- except IOError as e:
- pass
-
- has_reload = False
- try:
- with llfuse.lock:
- has_reload = "reload" in self.cdir
- except IOError as e:
- pass
-
- if docker_image is None:
- logger.error("Collection must contain a file 'docker_image' or must specify --image on the command line.")
- self.stop_docker()
- return
-
- if docker_image == self.prev_docker_image and self.cid is not None and has_reload:
- logger.info("Running container reload command")
- subprocess.check_call(["docker", "exec", self.cid, "/mnt/reload"])
- return
-
- self.stop_docker()
-
- logger.info("Starting Docker container %s", docker_image)
- self.cid = subprocess.check_output(["docker", "run",
- "--detach=true",
- "--publish=%i:80" % (self.port),
- "--volume=%s:/mnt:ro" % self.mountdir,
- docker_image]).strip()
-
- self.prev_docker_image = docker_image
- logger.info("Container id %s", self.cid)
-
- except subprocess.CalledProcessError:
- self.cid = None
-
- def wait_for_events(self):
- if not self.cid:
- logger.warning("No service running! Will wait for a new collection to appear in the project.")
- else:
- logger.info("Waiting for events")
-
- running = True
- self.loop = True
- while running:
- # Main run loop. Wait on project events, signals, or the
- # Docker container stopping.
-
- try:
- # Poll the queue with a 1 second timeout, if we have no
- # timeout the Python runtime doesn't have a chance to
- # process SIGINT or SIGTERM.
- eq = self.evqueue.get(True, 1)
- logger.info("%s %s", eq[1], eq[2])
- self.newcollection = self.collection
- if eq[1] in ('add', 'update', 'create'):
- self.newcollection = eq[2]
- elif eq[1] == 'remove':
- collections = self.api.collections().list(filters=[["owner_uuid", "=", self.project]],
- limit=1,
- order='modified_at desc').execute()['items']
- self.newcollection = collections[0]['uuid'] if collections else None
- running = False
- except Queue.Empty:
- pass
-
- if self.cid and not self.check_docker_running():
- logger.warning("Service has terminated. Will try to restart.")
- self.cid = None
- running = False
-
-
- def run(self):
- try:
- while self.loop:
- self.loop = False
- self.mount_collection()
- try:
- self.run_docker()
- self.wait_for_events()
- except (KeyboardInterrupt):
- logger.info("Got keyboard interrupt")
- self.ws.close()
- self.loop = False
- except Exception as e:
- logger.exception("Caught fatal exception, shutting down")
- self.ws.close()
- self.loop = False
- finally:
- self.stop_docker()
-
- if self.mountdir:
- logger.info("Unmounting")
- subprocess.call(["fusermount", "-u", self.mountdir])
- os.rmdir(self.mountdir)
-
-
-def main(argv):
- parser = argparse.ArgumentParser()
- parser.add_argument('--project-uuid', type=str, required=True, help="Project uuid to watch")
- parser.add_argument('--port', type=int, default=8080, help="Host port to listen on (default 8080)")
- parser.add_argument('--image', type=str, help="Docker image to run")
-
- args = parser.parse_args(argv)
-
- signal.signal(signal.SIGTERM, lambda signal, frame: sys.exit(0))
-
- try:
- arvweb = ArvWeb(args.project_uuid, args.image, args.port)
- arvweb.run()
- except arvados.errors.ArgumentError as e:
- logger.error(e)
- return 1
-
- return 0
-
-if __name__ == '__main__':
- sys.exit(main(sys.argv[1:]))
+++ /dev/null
-arvados/arv-web
\ No newline at end of file
+++ /dev/null
-Options +ExecCGI
-AddHandler cgi-script .cgi
-DirectoryIndex index.cgi
+++ /dev/null
-#!/usr/bin/perl
-
-print "Content-type: text/html\n\n";
-print "Hello world from perl!";
+++ /dev/null
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-app = proc do |env|
- [200, { "Content-Type" => "text/html" }, ["hello <b>world</b> from ruby"]]
-end
-run app
+++ /dev/null
-arvados/arv-web
\ No newline at end of file
+++ /dev/null
-arvados/arv-web
\ No newline at end of file
+++ /dev/null
-<!-- Copyright (C) The Arvados Authors. All rights reserved.
-
-SPDX-License-Identifier: AGPL-3.0 -->
-
-<html>
- <head><title>arv-web sample</title></head>
- <body>
- <p>Hello world static page</p>
- </body>
-</html>
+++ /dev/null
-arvados/arv-web
\ No newline at end of file
+++ /dev/null
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-def application(environ, start_response):
- start_response('200 OK', [('Content-Type', 'text/plain')])
- return [b"hello world from python!\n"]
from future.utils import viewitems
from future.utils import itervalues
from builtins import dict
-import logging
-import re
-import time
-import llfuse
-import arvados
import apiclient
+import arvados
+import errno
import functools
+import llfuse
+import logging
+import re
+import sys
import threading
-from apiclient import errors as apiclient_errors
-import errno
import time
+from apiclient import errors as apiclient_errors
from .fusefile import StringFile, ObjectFile, FuncToJSONFile, FuseArvadosFile
from .fresh import FreshBase, convertTime, use_counter, check_update
e = self.inodes.add_entry(ProjectDirectory(
self.inode, self.inodes, self.api, self.num_retries, project[u'items'][0]))
else:
- import sys
e = self.inodes.add_entry(CollectionDirectory(
self.inode, self.inodes, self.api, self.num_retries, k))
import errno
import os
import subprocess
+import sys
import time
func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
h.setupOnce.Do(h.setup)
- remoteAddr := r.RemoteAddr
- if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
- remoteAddr = xff + "," + remoteAddr
- }
if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
r.URL.Scheme = xfp
}
}
w.WriteHeader(http.StatusOK)
return true
+ case r.Method == http.MethodDelete:
+ if !objectNameGiven || r.URL.Path == "/" {
+ http.Error(w, "missing object name in DELETE request", http.StatusBadRequest)
+ return true
+ }
+ fspath := "by_id" + r.URL.Path
+ if strings.HasSuffix(fspath, "/") {
+ fspath = strings.TrimSuffix(fspath, "/")
+ fi, err := fs.Stat(fspath)
+ if os.IsNotExist(err) {
+ w.WriteHeader(http.StatusNoContent)
+ return true
+ } else if err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return true
+ } else if !fi.IsDir() {
+ // if "foo" exists and is a file, then
+ // "foo/" doesn't exist, so we say
+ // delete was successful.
+ w.WriteHeader(http.StatusNoContent)
+ return true
+ }
+ } else if fi, err := fs.Stat(fspath); err == nil && fi.IsDir() {
+ // if "foo" is a dir, it is visible via S3
+ // only as "foo/", not "foo" -- so we leave
+ // the dir alone and return 204 to indicate
+ // that "foo" does not exist.
+ w.WriteHeader(http.StatusNoContent)
+ return true
+ }
+ err = fs.Remove(fspath)
+ if os.IsNotExist(err) {
+ w.WriteHeader(http.StatusNoContent)
+ return true
+ }
+ if err != nil {
+ err = fmt.Errorf("rm failed: %w", err)
+ http.Error(w, err.Error(), http.StatusBadRequest)
+ return true
+ }
+ err = fs.Sync()
+ if err != nil {
+ err = fmt.Errorf("sync failed: %w", err)
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return true
+ }
+ w.WriteHeader(http.StatusNoContent)
+ return true
default:
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return true
walkpath = ""
}
- resp := s3.ListResp{
- Name: strings.SplitN(r.URL.Path[1:], "/", 2)[0],
- Prefix: params.prefix,
- Delimiter: params.delimiter,
- Marker: params.marker,
- MaxKeys: params.maxKeys,
+ type commonPrefix struct {
+ Prefix string
+ }
+ type listResp struct {
+ XMLName string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult"`
+ s3.ListResp
+ // s3.ListResp marshals an empty tag when
+ // CommonPrefixes is nil, which confuses some clients.
+ // Fix by using this nested struct instead.
+ CommonPrefixes []commonPrefix
+ }
+ resp := listResp{
+ ListResp: s3.ListResp{
+ Name: strings.SplitN(r.URL.Path[1:], "/", 2)[0],
+ Prefix: params.prefix,
+ Delimiter: params.delimiter,
+ Marker: params.marker,
+ MaxKeys: params.maxKeys,
+ },
}
commonPrefixes := map[string]bool{}
err := walkFS(fs, strings.TrimSuffix(bucketdir+"/"+walkpath, "/"), true, func(path string, fi os.FileInfo) error {
return
}
if params.delimiter != "" {
+ resp.CommonPrefixes = make([]commonPrefix, 0, len(commonPrefixes))
for prefix := range commonPrefixes {
- resp.CommonPrefixes = append(resp.CommonPrefixes, prefix)
- sort.Strings(resp.CommonPrefixes)
+ resp.CommonPrefixes = append(resp.CommonPrefixes, commonPrefix{prefix})
}
+ sort.Slice(resp.CommonPrefixes, func(i, j int) bool { return resp.CommonPrefixes[i].Prefix < resp.CommonPrefixes[j].Prefix })
}
- wrappedResp := struct {
- XMLName string `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult"`
- s3.ListResp
- }{"", resp}
w.Header().Set("Content-Type", "application/xml")
io.WriteString(w, xml.Header)
- if err := xml.NewEncoder(w).Encode(wrappedResp); err != nil {
+ if err := xml.NewEncoder(w).Encode(resp); err != nil {
ctxlog.FromContext(r.Context()).WithError(err).Error("error writing xml response")
}
}
}
}
+func (s *IntegrationSuite) TestS3CollectionDeleteObject(c *check.C) {
+ stage := s.s3setup(c)
+ defer stage.teardown(c)
+ s.testS3DeleteObject(c, stage.collbucket, "")
+}
+func (s *IntegrationSuite) TestS3ProjectDeleteObject(c *check.C) {
+ stage := s.s3setup(c)
+ defer stage.teardown(c)
+ s.testS3DeleteObject(c, stage.projbucket, stage.coll.Name+"/")
+}
+func (s *IntegrationSuite) testS3DeleteObject(c *check.C, bucket *s3.Bucket, prefix string) {
+ s.testServer.Config.cluster.Collections.S3FolderObjects = true
+ for _, trial := range []struct {
+ path string
+ }{
+ {"/"},
+ {"nonexistentfile"},
+ {"emptyfile"},
+ {"sailboat.txt"},
+ {"sailboat.txt/"},
+ {"emptydir"},
+ {"emptydir/"},
+ } {
+ objname := prefix + trial.path
+ comment := check.Commentf("objname %q", objname)
+
+ err := bucket.Del(objname)
+ if trial.path == "/" {
+ c.Check(err, check.NotNil)
+ continue
+ }
+ c.Check(err, check.IsNil, comment)
+ _, err = bucket.GetReader(objname)
+ c.Check(err, check.NotNil, comment)
+ }
+}
+
func (s *IntegrationSuite) TestS3CollectionPutObjectFailure(c *check.C) {
stage := s.s3setup(c)
defer stage.teardown(c)
defer stage.teardown(c)
for _, bucket := range []*s3.Bucket{stage.collbucket, stage.projbucket} {
req, err := http.NewRequest("GET", bucket.URL("/"), nil)
+ c.Check(err, check.IsNil)
req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
req.URL.RawQuery = "versioning"
resp, err := http.DefaultClient.Do(req)
}
}
+// If there are no CommonPrefixes entries, the CommonPrefixes XML tag
+// should not appear at all.
+func (s *IntegrationSuite) TestS3ListNoCommonPrefixes(c *check.C) {
+ stage := s.s3setup(c)
+ defer stage.teardown(c)
+
+ req, err := http.NewRequest("GET", stage.collbucket.URL("/"), nil)
+ c.Assert(err, check.IsNil)
+ req.Header.Set("Authorization", "AWS "+arvadostest.ActiveTokenV2+":none")
+ req.URL.RawQuery = "prefix=asdfasdfasdf&delimiter=/"
+ resp, err := http.DefaultClient.Do(req)
+ c.Assert(err, check.IsNil)
+ buf, err := ioutil.ReadAll(resp.Body)
+ c.Assert(err, check.IsNil)
+ c.Check(string(buf), check.Not(check.Matches), `(?ms).*CommonPrefixes.*`)
+}
+
func (s *IntegrationSuite) TestS3CollectionList(c *check.C) {
stage := s.s3setup(c)
defer stage.teardown(c)
cluster.Services.Keepstore.InternalURLs = make(map[arvados.URL]arvados.ServiceInstance)
}
- cluster.Services.Keepproxy.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: ":0"}: arvados.ServiceInstance{}}
+ cluster.Services.Keepproxy.InternalURLs = map[arvados.URL]arvados.ServiceInstance{{Host: ":0"}: {}}
listener = nil
go func() {
s.cluster.Collections.BlobSigningKey = knownKey
s.cluster.SystemRootToken = arvadostest.SystemRootToken
s.cluster.RemoteClusters = map[string]arvados.RemoteCluster{
- s.remoteClusterID: arvados.RemoteCluster{
+ s.remoteClusterID: {
Host: strings.Split(s.remoteAPI.URL, "//")[1],
Proxy: true,
Scheme: "http",
cluster.TLS.Insecure = client.Insecure
cluster.PostgreSQL.Connection = testDBConfig()
cluster.PostgreSQL.ConnectionPool = 12
- cluster.Services.Websocket.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: ":"}: arvados.ServiceInstance{}}
+ cluster.Services.Websocket.InternalURLs = map[arvados.URL]arvados.ServiceInstance{{Host: ":"}: {}}
cluster.ManagementToken = arvadostest.ManagementToken
return cluster, nil
}
ARVADOS_ROOT="$ARVBOX_DATA/arvados"
fi
-if test -z "$SSO_ROOT" ; then
- SSO_ROOT="$ARVBOX_DATA/sso-devise-omniauth-provider"
-fi
-
if test -z "$COMPOSER_ROOT" ; then
COMPOSER_ROOT="$ARVBOX_DATA/composer"
fi
docker_run_dev() {
docker run \
"--volume=$ARVADOS_ROOT:/usr/src/arvados:rw" \
- "--volume=$SSO_ROOT:/usr/src/sso:rw" \
"--volume=$COMPOSER_ROOT:/usr/src/composer:rw" \
"--volume=$WORKBENCH2_ROOT:/usr/src/workbench2:rw" \
"--volume=$PG_DATA:/var/lib/postgresql:rw" \
if ! test -d "$ARVADOS_ROOT" ; then
git clone https://git.arvados.org/arvados.git "$ARVADOS_ROOT"
fi
- if ! test -d "$SSO_ROOT" ; then
- git clone https://github.com/arvados/sso-devise-omniauth-provider.git "$SSO_ROOT"
- fi
if ! test -d "$COMPOSER_ROOT" ; then
git clone https://github.com/arvados/composer.git "$COMPOSER_ROOT"
git -C "$COMPOSER_ROOT" checkout arvados-fork
git -C "$COMPOSER_ROOT" pull
fi
if ! test -d "$WORKBENCH2_ROOT" ; then
- git clone https://github.com/arvados/arvados-workbench2.git "$WORKBENCH2_ROOT"
+ git clone https://git.arvados.org/arvados-workbench2.git "$WORKBENCH2_ROOT"
fi
if [[ "$CONFIG" = test ]] ; then
/usr/local/lib/arvbox/runsu.sh \
/usr/local/lib/arvbox/waitforpostgres.sh
- docker exec -ti \
- $ARVBOX_CONTAINER \
- /usr/local/lib/arvbox/runsu.sh \
- /var/lib/arvbox/service/sso/run-service --only-setup
-
docker exec -ti \
$ARVBOX_CONTAINER \
/usr/local/lib/arvbox/runsu.sh \
exit 1
fi
set -x
+ chmod -R u+w "$ARVBOX_DATA"
rm -rf "$ARVBOX_DATA"
else
if test "$1" != -f ; then
"$ARVBOX_BASE/$1/gopath" \
"$ARVBOX_BASE/$1/Rlibs" \
"$ARVBOX_BASE/$1/arvados" \
- "$ARVBOX_BASE/$1/sso-devise-omniauth-provider" \
"$ARVBOX_BASE/$1/composer" \
"$ARVBOX_BASE/$1/workbench2" \
"$ARVBOX_BASE/$2"
EOF
;;
+ adduser)
+ docker exec -ti $ARVBOX_CONTAINER /usr/local/lib/arvbox/edit_users.py /var/lib/arvados/cluster_config.yml.override $(getclusterid) add $@
+ docker exec $ARVBOX_CONTAINER sv restart controller
+ ;;
+
+ removeuser)
+ docker exec -ti $ARVBOX_CONTAINER /usr/local/lib/arvbox/edit_users.py /var/lib/arvados/cluster_config.yml.override $(getclusterid) remove $@
+ docker exec $ARVBOX_CONTAINER sv restart controller
+ ;;
+
+ listusers)
+ exec docker exec -ti $ARVBOX_CONTAINER /usr/local/lib/arvbox/edit_users.py /var/lib/arvados/cluster_config.yml $(getclusterid) list
+ ;;
+
*)
echo "Arvados-in-a-box https://doc.arvados.org/install/arvbox.html"
echo
echo "sv <start|stop|restart> <service> "
echo " change state of service inside arvbox"
echo "clone <from> <to> clone dev arvbox"
+ echo "adduser <username> <email>"
+ echo " add a user login"
+ echo "removeuser <username>"
+ echo " remove user login"
+ echo "listusers list user logins"
;;
esac
keep-setup.sh common.sh createusers.sh \
logger runsu.sh waitforpostgres.sh \
yml_override.py api-setup.sh \
- go-setup.sh devenv.sh cluster-config.sh \
+ go-setup.sh devenv.sh cluster-config.sh edit_users.py \
/usr/local/lib/arvbox/
ADD runit /etc/runit
FROM arvados/arvbox-base
ARG arvados_version
-ARG sso_version=master
ARG composer_version=arvados-fork
ARG workbench2_version=master
RUN cd /usr/src && \
- git clone --no-checkout https://github.com/arvados/arvados.git && \
+ git clone --no-checkout https://git.arvados.org/arvados.git && \
git -C arvados checkout ${arvados_version} && \
git -C arvados pull && \
- git clone --no-checkout https://github.com/arvados/sso-devise-omniauth-provider.git sso && \
- git -C sso checkout ${sso_version} && \
- git -C sso pull && \
git clone --no-checkout https://github.com/arvados/composer.git && \
git -C composer checkout ${composer_version} && \
git -C composer pull && \
- git clone --no-checkout https://github.com/arvados/arvados-workbench2.git workbench2 && \
+ git clone --no-checkout https://git.arvados.org/arvados-workbench2.git workbench2 && \
git -C workbench2 checkout ${workbench2_version} && \
git -C workbench2 pull && \
chown -R 1000:1000 /usr/src
RUN ln -sf /var/lib/arvbox/service /etc
RUN mkdir -p /var/lib/arvados
RUN echo "production" > /var/lib/arvados/api_rails_env
-RUN echo "production" > /var/lib/arvados/sso_rails_env
RUN echo "production" > /var/lib/arvados/workbench_rails_env
RUN /usr/local/lib/arvbox/createusers.sh
RUN sudo -u arvbox /var/lib/arvbox/service/composer/run-service --only-deps
RUN sudo -u arvbox /var/lib/arvbox/service/workbench2/run-service --only-deps
RUN sudo -u arvbox /var/lib/arvbox/service/keep-web/run-service --only-deps
-RUN sudo -u arvbox /var/lib/arvbox/service/sso/run-service --only-deps
RUN sudo -u arvbox /var/lib/arvbox/service/workbench/run-service --only-deps
RUN sudo -u arvbox /var/lib/arvbox/service/doc/run-service --only-deps
RUN sudo -u arvbox /var/lib/arvbox/service/vm/run-service --only-deps
RUN ln -sf /var/lib/arvbox/service /etc
RUN mkdir -p /var/lib/arvados
RUN echo "development" > /var/lib/arvados/api_rails_env
-RUN echo "development" > /var/lib/arvados/sso_rails_env
RUN echo "development" > /var/lib/arvados/workbench_rails_env
RUN mkdir /etc/test-service && \
secret_token=$(cat /var/lib/arvados/api_secret_token)
blob_signing_key=$(cat /var/lib/arvados/blob_signing_key)
management_token=$(cat /var/lib/arvados/management_token)
- sso_app_secret=$(cat /var/lib/arvados/sso_app_secret)
database_pw=$(cat /var/lib/arvados/api_database_pw)
vm_uuid=$(cat /var/lib/arvados/vm-uuid)
uuid_prefix: $uuid_prefix
secret_token: $secret_token
blob_signing_key: $blob_signing_key
- sso_app_secret: $sso_app_secret
- sso_app_id: arvados-server
- sso_provider_url: "https://$localip:${services[sso]}"
- sso_insecure: false
workbench_address: "https://$localip/"
websocket_address: "wss://$localip:${services[websockets-ssl]}/websocket"
git_repo_ssh_base: "git@$localip:"
fi
system_root_token=$(cat /var/lib/arvados/system_root_token)
-if ! test -s /var/lib/arvados/sso_app_secret ; then
- ruby -e 'puts rand(2**400).to_s(36)' > /var/lib/arvados/sso_app_secret
-fi
-sso_app_secret=$(cat /var/lib/arvados/sso_app_secret)
-
if ! test -s /var/lib/arvados/vm-uuid ; then
echo $uuid_prefix-2x53u-$(ruby -e 'puts rand(2**400).to_s(36)[0,15]') > /var/lib/arvados/vm-uuid
fi
ExternalURL: "https://$localip:${services[workbench]}"
Workbench2:
ExternalURL: "https://$localip:${services[workbench2-ssl]}"
- SSO:
- ExternalURL: "https://$localip:${services[sso]}"
Keepproxy:
ExternalURL: "https://$localip:${services[keepproxy-ssl]}"
InternalURLs:
InternalURLs:
"http://localhost:${services[keep-web]}/": {}
ExternalURL: "https://$localip:${services[keep-web-ssl]}/"
- InternalURLs:
- "http://localhost:${services[keep-web]}/": {}
Composer:
ExternalURL: "https://$localip:${services[composer]}"
Controller:
DefaultReplication: 1
TrustAllContent: true
Login:
- SSO:
+ Test:
Enable: true
- ProviderAppSecret: $sso_app_secret
- ProviderAppID: arvados-server
Users:
NewUsersAreActive: true
AutoAdminFirstUser: true
cp /var/lib/arvados/cluster_config.yml /etc/arvados/config.yml
+chmod og-rw \
+ /var/lib/arvados/cluster_config.yml.override \
+ /var/lib/arvados/cluster_config.yml \
+ /etc/arvados/config.yml \
+ /var/lib/arvados/api_secret_token \
+ /var/lib/arvados/blob_signing_key \
+ /var/lib/arvados/management_token \
+ /var/lib/arvados/system_root_token \
+ /var/lib/arvados/api_database_pw \
+ /var/lib/arvados/workbench_secret_token \
+ /var/lib/arvados/superuser_token \
+
mkdir -p /var/lib/arvados/run_tests
cat >/var/lib/arvados/run_tests/config.yml <<EOF
Clusters:
[api]=8004
[controller]=8003
[controller-ssl]=8000
- [sso]=8900
[composer]=4200
[arv-git-httpd-ssl]=9000
[arv-git-httpd]=9001
--- /dev/null
+#!/usr/bin/env python3
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+import ruamel.yaml
+import sys
+import getpass
+import os
+
+def print_help():
+ print("%s <path/to/config.yaml> <clusterid> add <username> <email> [pass]" % (sys.argv[0]))
+ print("%s <path/to/config.yaml> <clusterid> remove <username>" % (" " * len(sys.argv[0])))
+ print("%s <path/to/config.yaml> <clusterid> list" % (" " * len(sys.argv[0])))
+ exit()
+
+if len(sys.argv) < 4:
+ print_help()
+
+fn = sys.argv[1]
+cl = sys.argv[2]
+op = sys.argv[3]
+
+if op == "remove" and len(sys.argv) < 5:
+ print_help()
+if op == "add" and len(sys.argv) < 6:
+ print_help()
+
+if op in ("add", "remove"):
+ user = sys.argv[4]
+
+if not os.path.exists(fn):
+ open(fn, "w").close()
+
+with open(fn, "r") as f:
+ conf = ruamel.yaml.round_trip_load(f)
+
+if not conf:
+ conf = {}
+
+conf["Clusters"] = conf.get("Clusters", {})
+conf["Clusters"][cl] = conf["Clusters"].get(cl, {})
+conf["Clusters"][cl]["Login"] = conf["Clusters"][cl].get("Login", {})
+conf["Clusters"][cl]["Login"]["Test"] = conf["Clusters"][cl]["Login"].get("Test", {})
+conf["Clusters"][cl]["Login"]["Test"]["Users"] = conf["Clusters"][cl]["Login"]["Test"].get("Users", {})
+
+users_obj = conf["Clusters"][cl]["Login"]["Test"]["Users"]
+
+if op == "add":
+ email = sys.argv[5]
+ if len(sys.argv) == 7:
+ p = sys.argv[6]
+ else:
+ p = getpass.getpass("Password for %s: " % user)
+
+ users_obj[user] = {
+ "Email": email,
+ "Password": p
+ }
+ print("Added %s" % user)
+elif op == "remove":
+ del users_obj[user]
+ print("Removed %s" % user)
+elif op == "list":
+ print(ruamel.yaml.round_trip_dump(users_obj))
+else:
+ print("Operations are 'add', 'remove' and 'list'")
+
+with open(fn, "w") as f:
+ f.write(ruamel.yaml.round_trip_dump(conf))
gemlockcount=0
for l in /usr/src/arvados/services/api/Gemfile.lock \
- /usr/src/arvados/apps/workbench/Gemfile.lock \
- /usr/src/sso/Gemfile.lock ; do
+ /usr/src/arvados/apps/workbench/Gemfile.lock ; do
gc=$(cat $l \
| grep -vE "(GEM|PLATFORMS|DEPENDENCIES|BUNDLED|GIT|$^|remote:|specs:|revision:)" \
| sed 's/^ *//' | sed 's/(.*)//' | sed 's/ *$//' | sort | uniq | wc -l)
+++ /dev/null
-/usr/local/lib/arvbox/logger
\ No newline at end of file
+++ /dev/null
-/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
+++ /dev/null
-#!/bin/bash
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-exec 2>&1
-set -ex -o pipefail
-
-. /usr/local/lib/arvbox/common.sh
-
-cd /usr/src/sso
-if test -s /var/lib/arvados/sso_rails_env ; then
- export RAILS_ENV=$(cat /var/lib/arvados/sso_rails_env)
-else
- export RAILS_ENV=development
-fi
-
-run_bundler --without=development
-bundle exec passenger-config build-native-support
-bundle exec passenger-config install-standalone-runtime
-
-if test "$1" = "--only-deps" ; then
- exit
-fi
-
-set -u
-
-uuid_prefix=$(cat /var/lib/arvados/api_uuid_prefix)
-
-if ! test -s /var/lib/arvados/sso_secret_token ; then
- ruby -e 'puts rand(2**400).to_s(36)' > /var/lib/arvados/sso_secret_token
-fi
-secret_token=$(cat /var/lib/arvados/sso_secret_token)
-
-openssl verify -CAfile $root_cert $server_cert
-
-cat >config/application.yml <<EOF
-$RAILS_ENV:
- uuid_prefix: $uuid_prefix
- secret_token: $secret_token
- default_link_url: "http://$localip"
- allow_account_registration: true
-EOF
-
-(cd config && /usr/local/lib/arvbox/yml_override.py application.yml)
-
-if ! test -f /var/lib/arvados/sso_database_pw ; then
- ruby -e 'puts rand(2**128).to_s(36)' > /var/lib/arvados/sso_database_pw
-fi
-database_pw=$(cat /var/lib/arvados/sso_database_pw)
-
-if ! (psql postgres -c "\du" | grep "^ arvados_sso ") >/dev/null ; then
- psql postgres -c "create user arvados_sso with password '$database_pw'"
- psql postgres -c "ALTER USER arvados_sso CREATEDB;"
-fi
-
-sed "s/password:.*/password: $database_pw/" <config/database.yml.example >config/database.yml
-
-if ! test -f /var/lib/arvados/sso_database_setup ; then
- bundle exec rake db:setup
-
- app_secret=$(cat /var/lib/arvados/sso_app_secret)
-
- bundle exec rails console <<EOF
-c = Client.new
-c.name = "joshid"
-c.app_id = "arvados-server"
-c.app_secret = "$app_secret"
-c.save!
-EOF
-
- touch /var/lib/arvados/sso_database_setup
-fi
-
-rm -rf tmp
-mkdir -p tmp/cache
-
-bundle exec rake assets:precompile
-bundle exec rake db:migrate
-
-set +u
-if test "$1" = "--only-setup" ; then
- exit
-fi
-
-exec bundle exec passenger start --port=${services[sso]} \
- --ssl --ssl-certificate=/var/lib/arvados/server-cert-${localip}.pem \
- --ssl-certificate-key=/var/lib/arvados/server-cert-${localip}.key
{
"variables": {
- "storage_account": null,
"resource_group": null,
"client_id": "{{env `ARM_CLIENT_ID`}}",
"client_secret": "{{env `ARM_CLIENT_SECRET`}}",
"subscription_id": "{{user `subscription_id`}}",
"tenant_id": "{{user `tenant_id`}}",
- "resource_group_name": "{{user `resource_group`}}",
- "storage_account": "{{user `storage_account`}}",
-
- "capture_container_name": "images",
- "capture_name_prefix": "{{user `arvados_cluster`}}-compute",
+ "managed_image_resource_group_name": "{{user `resource_group`}}",
+ "managed_image_name": "{{user `arvados_cluster`}}-compute-v{{ timestamp }}",
"ssh_username": "{{user `ssh_user`}}",
"ssh_private_key_file": "{{user `ssh_private_key_file`}}",
Azure secrets file which will be sourced from this script
--azure-resource-group (default: false, required if building for Azure)
Azure resource group
- --azure-storage-account (default: false, required if building for Azure)
- Azure storage account
--azure-location (default: false, required if building for Azure)
Azure location, e.g. centralus, eastus, westeurope
--azure-sku (default: unset, required if building for Azure, e.g. 16.04-LTS)
GCP_ZONE=
AZURE_SECRETS_FILE=
AZURE_RESOURCE_GROUP=
-AZURE_STORAGE_ACCOUNT=
AZURE_LOCATION=
AZURE_CLOUD_ENVIRONMENT=
DEBUG=
PUBLIC_KEY_FILE=
PARSEDOPTS=$(getopt --name "$0" --longoptions \
- help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-storage-account:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,domain:,resolver:,reposuffix:,public-key-file:,debug \
+ help,json-file:,arvados-cluster-id:,aws-source-ami:,aws-profile:,aws-secrets-file:,aws-region:,aws-vpc-id:,aws-subnet-id:,gcp-project-id:,gcp-account-file:,gcp-zone:,azure-secrets-file:,azure-resource-group:,azure-location:,azure-sku:,azure-cloud-environment:,ssh_user:,domain:,resolver:,reposuffix:,public-key-file:,debug \
-- "" "$@")
if [ $? -ne 0 ]; then
exit 1
--azure-resource-group)
AZURE_RESOURCE_GROUP="$2"; shift
;;
- --azure-storage-account)
- AZURE_STORAGE_ACCOUNT="$2"; shift
- ;;
--azure-location)
AZURE_LOCATION="$2"; shift
;;
if [[ "$AZURE_RESOURCE_GROUP" != "" ]]; then
EXTRA2+=" -var resource_group=$AZURE_RESOURCE_GROUP"
fi
-if [[ "$AZURE_STORAGE_ACCOUNT" != "" ]]; then
- EXTRA2+=" -var storage_account=$AZURE_STORAGE_ACCOUNT"
-fi
if [[ "$AZURE_LOCATION" != "" ]]; then
EXTRA2+=" -var location=$AZURE_LOCATION"
fi
--- /dev/null
+#!/bin/sh
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+set -e
+
+if test -z "$1" ; then
+ echo "$0: Copies Arvados tutorial resources from public data cluster (jutro)"
+ echo "Usage: copy-tutorial.sh <dest>"
+ echo "<dest> is destination cluster configuration that can be found in ~/.config/arvados"
+ exit
+fi
+
+echo "Copying from public data cluster (jutro) to $1"
+
+for a in $(cat $HOME/.config/arvados/$1.conf) ; do export $a ; done
+
+project_uuid=$(arv --format=uuid group create --group '{"name":"User guide resources", "group_class": "project"}')
+
+# Bwa-mem workflow
+arv-copy --src jutro --dst $1 --project-uuid=$project_uuid f141fc27e7cfa7f7b6d208df5e0ee01b+59
+arv-copy --src jutro --dst $1 --project-uuid=$project_uuid jutro-7fd4e-mkmmq53m1ze6apx
+
+echo "Data copied to \"User guide resources\" at $project_uuid"