From: Peter Amstutz Date: Tue, 22 Sep 2020 19:57:34 +0000 (-0400) Subject: Merge branch 'master' into 16811-public-favs X-Git-Tag: 2.1.0~44^2~1 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/7499f61a2912cfdb1a316808fafa6e6ee77ee2e0?hp=a13547aec78a75da2174e083f6015280787cd597 Merge branch 'master' into 16811-public-favs Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- diff --git a/apps/workbench/app/controllers/projects_controller.rb b/apps/workbench/app/controllers/projects_controller.rb index 66dc3dcea2..e448e1b453 100644 --- a/apps/workbench/app/controllers/projects_controller.rb +++ b/apps/workbench/app/controllers/projects_controller.rb @@ -133,7 +133,7 @@ class ProjectsController < ApplicationController def remove_items @removed_uuids = [] params[:item_uuids].collect { |uuid| ArvadosBase.find uuid }.each do |item| - if item.class == Collection or item.class == Group + if item.class == Collection or item.class == Group or item.class == Workflow or item.class == ContainerRequest # Use delete API on collections and projects/groups item.destroy @removed_uuids << item.uuid diff --git a/apps/workbench/app/views/work_units/_show_component.html.erb b/apps/workbench/app/views/work_units/_show_component.html.erb index cac263d1ec..4cce090a22 100644 --- a/apps/workbench/app/views/work_units/_show_component.html.erb +++ b/apps/workbench/app/views/work_units/_show_component.html.erb @@ -45,13 +45,13 @@ SPDX-License-Identifier: AGPL-3.0 %>
<% if wu.runtime_status[:errorDetail] %> -
<%= sanitize(wu.runtime_status[:errorDetail]) %>
+
<%= h(wu.runtime_status[:errorDetail]) %>
<% else %> No detailed information available. <% end %> @@ -69,13 +69,13 @@ SPDX-License-Identifier: AGPL-3.0 %>
<% if wu.runtime_status[:warningDetail] %> -
<%= sanitize(wu.runtime_status[:warningDetail]) %>
+
<%= h(wu.runtime_status[:warningDetail]) %>
<% else %> No detailed information available. <% end %> diff --git a/build/run-library.sh b/build/run-library.sh index 528d69d998..df551455c1 100755 --- a/build/run-library.sh +++ b/build/run-library.sh @@ -704,6 +704,8 @@ fpm_build_virtualenv () { COMMAND_ARR+=(".") + debug_echo -e "\n${COMMAND_ARR[@]}\n" + FPM_RESULTS=$("${COMMAND_ARR[@]}") FPM_EXIT_CODE=$? @@ -827,13 +829,13 @@ fpm_build () { COMMAND_ARR+=('--exclude' "$i") done + COMMAND_ARR+=("${fpm_args[@]}") + # Append remaining function arguments directly to fpm's command line. for i; do COMMAND_ARR+=("$i") done - COMMAND_ARR+=("${fpm_args[@]}") - COMMAND_ARR+=("$PACKAGE") debug_echo -e "\n${COMMAND_ARR[@]}\n" diff --git a/build/run-tests.sh b/build/run-tests.sh index 6359fff1de..595f721080 100755 --- a/build/run-tests.sh +++ b/build/run-tests.sh @@ -88,7 +88,7 @@ lib/cloud/cloudtest lib/dispatchcloud lib/dispatchcloud/container lib/dispatchcloud/scheduler -lib/dispatchcloud/ssh_executor +lib/dispatchcloud/sshexecutor lib/dispatchcloud/worker lib/mount lib/pam @@ -650,6 +650,7 @@ install_env() { . "$VENV3DIR/bin/activate" # Needed for run_test_server.py which is used by certain (non-Python) tests. + # pdoc3 needed to generate the Python SDK documentation. ( set -e "${VENV3DIR}/bin/pip3" install wheel @@ -660,6 +661,7 @@ install_env() { "${VENV3DIR}/bin/pip3" install ciso8601 "${VENV3DIR}/bin/pip3" install pycurl "${VENV3DIR}/bin/pip3" install ws4py + "${VENV3DIR}/bin/pip3" install pdoc3 cd "$WORKSPACE/sdk/python" python3 setup.py install ) || fatal "installing PyYAML and sdk/python failed" @@ -707,7 +709,7 @@ do_test() { stop_services check_arvados_config "$1" ;; - gofmt | doc | lib/cli | lib/cloud/azure | lib/cloud/ec2 | lib/cloud/cloudtest | lib/cmd | lib/dispatchcloud/ssh_executor | lib/dispatchcloud/worker) + gofmt | doc | lib/cli | lib/cloud/azure | lib/cloud/ec2 | lib/cloud/cloudtest | lib/cmd | lib/dispatchcloud/sshexecutor | lib/dispatchcloud/worker) check_arvados_config "$1" # don't care whether services are running ;; diff --git a/doc/Gemfile.lock b/doc/Gemfile.lock index 344a0a86b5..b5e62cacd6 100644 --- a/doc/Gemfile.lock +++ b/doc/Gemfile.lock @@ -1,28 +1,23 @@ GEM remote: https://rubygems.org/ specs: - RedCloth (4.2.9) - coderay (1.1.0) - colorize (0.6.0) - kramdown (1.3.1) - less (1.2.21) - mutter (>= 0.4.2) - treetop (>= 1.4.2) - liquid (2.6.1) - makerakeworkwell (1.0.3) - rake (>= 0.9.2, < 11) - mutter (0.5.3) - polyglot (0.3.3) - rake (10.1.1) - treetop (1.4.15) - polyglot - polyglot (>= 0.3.1) - zenweb (3.3.1) + RedCloth (4.3.2) + coderay (1.1.3) + colorize (0.8.1) + commonjs (0.2.7) + kramdown (1.17.0) + less (2.6.0) + commonjs (~> 0.2.7) + liquid (4.0.3) + makerakeworkwell (1.0.4) + rake (>= 0.9.2, < 15) + rake (13.0.1) + zenweb (3.10.4) coderay (~> 1.0) - kramdown (~> 1.0) - less (~> 1.2) + kramdown (~> 1.4) + less (~> 2.0) makerakeworkwell (~> 1.0) - rake (>= 0.9, < 11) + rake (>= 0.9, < 15) PLATFORMS ruby @@ -32,3 +27,6 @@ DEPENDENCIES colorize liquid zenweb + +BUNDLED WITH + 2.1.4 diff --git a/doc/README.textile b/doc/README.textile index 75a30e9ef2..85757980a7 100644 --- a/doc/README.textile +++ b/doc/README.textile @@ -13,20 +13,28 @@ Additional information is available on the "'Documentation' page on the Arvados h2. Install dependencies
+arvados/doc$ sudo apt-get install build-essential libcurl4-openssl-dev libgnutls28-dev libssl-dev
 arvados/doc$ bundle install
-arvados/doc$ pip install epydoc
+
+ +To generate the Python SDK documentation, these additional dependencies are needed: + +
+arvados/doc$ sudo apt-get install python3-pip
+arvados/doc$ pip3 install arvados-python-client
+arvados/doc$ pip3 install pdoc3
 
h2. Generate HTML pages
-arvados/doc$ rake
+arvados/doc$ bundle exec rake
 
Alternately, to make the documentation browsable on the local filesystem:
-arvados/doc$ rake generate baseurl=$PWD/.site
+arvados/doc$ bundle exec rake generate baseurl=$PWD/.site
 
h2. Run linkchecker @@ -35,7 +43,7 @@ If you have "Linkchecker":http://wummel.github.io/linkchecker/ installed on your system, you can run it against the documentation:
-arvados/doc$ rake linkchecker baseurl=file://$PWD/.site
+arvados/doc$ bundle exec rake linkchecker baseurl=file://$PWD/.site
 
Please note that this will regenerate your $PWD/.site directory. @@ -43,7 +51,7 @@ Please note that this will regenerate your $PWD/.site directory. h2. Preview HTML pages
-arvados/doc$ rake run
+arvados/doc$ bundle exec rake run
 [2014-03-10 09:03:41] INFO  WEBrick 1.3.1
 [2014-03-10 09:03:41] INFO  ruby 2.1.1 (2014-02-24) [x86_64-linux]
 [2014-03-10 09:03:41] INFO  WEBrick::HTTPServer#start: pid=8926 port=8000
@@ -58,7 +66,7 @@ h2. Publish HTML pages inside Workbench
 You can set @baseurl@ (the URL prefix for all internal links), @arvados_cluster_uuid@, @arvados_api_host@ and @arvados_workbench_host@ without changing @_config.yml@:
 
 
-arvados/doc$ rake generate baseurl=/doc arvados_api_host=xyzzy.arvadosapi.com
+arvados/doc$ bundle exec rake generate baseurl=/doc arvados_api_host=xyzzy.arvadosapi.com
 
Make the docs appear at {workbench_host}/doc by creating a symbolic link in Workbench's @public@ directory, pointing to the generated HTML tree. @@ -70,5 +78,5 @@ arvados/doc$ ln -sn ../../../doc/.site ../apps/workbench/public/doc h2. Delete generated files
-arvados/doc$ rake realclean
+arvados/doc$ bundle exec rake realclean
 
diff --git a/doc/Rakefile b/doc/Rakefile index 623dbd033b..f7050dc41f 100644 --- a/doc/Rakefile +++ b/doc/Rakefile @@ -35,12 +35,12 @@ file "sdk/python/arvados/index.html" do |t| if ENV['NO_SDK'] || File.exists?("no-sdk") next end - `which epydoc` + `which pdoc` if $? == 0 - STDERR.puts `epydoc --html --parse-only -o sdk/python/arvados ../sdk/python/arvados/ 2>&1` + STDERR.puts `pdoc --html -o sdk/python ../sdk/python/arvados/ 2>&1` raise if $? != 0 else - puts "Warning: epydoc not found, Python documentation will not be generated".colorize(:light_red) + puts "Warning: pdoc3 not found, Python documentation will not be generated".colorize(:light_red) end end diff --git a/doc/_config.yml b/doc/_config.yml index a85576e450..44f8112c14 100644 --- a/doc/_config.yml +++ b/doc/_config.yml @@ -24,6 +24,8 @@ navbar: - Welcome: - user/index.html.textile.liquid - user/getting_started/community.html.textile.liquid + - Walkthough: + - user/tutorials/wgs-tutorial.html.textile.liquid - Run a workflow using Workbench: - user/getting_started/workbench.html.textile.liquid - user/tutorials/tutorial-workflow-workbench.html.textile.liquid diff --git a/doc/_includes/_install_ruby_and_bundler.liquid b/doc/_includes/_install_ruby_and_bundler.liquid index d14e555f89..d1d33cbbe3 100644 --- a/doc/_includes/_install_ruby_and_bundler.liquid +++ b/doc/_includes/_install_ruby_and_bundler.liquid @@ -4,7 +4,7 @@ Copyright (C) The Arvados Authors. All rights reserved. SPDX-License-Identifier: CC-BY-SA-3.0 {% endcomment %} -Minimum of Ruby 2.3 is required. Ruby 2.5 is recommended. +Ruby 2.5 or newer is required. * "Option 1: Install from packages":#packages * "Option 2: Install with RVM":#rvm @@ -13,16 +13,18 @@ Minimum of Ruby 2.3 is required. Ruby 2.5 is recommended. h2(#packages). Option 1: Install from packages {% include 'notebox_begin' %} -Future versions of Arvados may require a newer version of Ruby than is packaged with your OS. Using OS packages simplifies initial install, but may complicate upgrades that rely on a newer Ruby. If this is a concern, we recommend using "RVM.":#rvm +Future versions of Arvados may require a newer version of Ruby than is packaged with your OS. Using OS packages simplifies initial install, but may complicate upgrades that rely on a newer Ruby. If this is a concern, we recommend using "RVM":#rvm. {% include 'notebox_end' %} h3. Centos 7 -The Ruby version shipped with Centos 7 is too old. Use "RVM.":#rvm +The Ruby version shipped with Centos 7 is too old. Use "RVM":#rvm to install Ruby 2.5 or later. h3. Debian and Ubuntu -Debian 9 (stretch) and Ubuntu 16.04 (xenial) ship Ruby 2.3, which is sufficient to run Arvados. Later releases have newer versions of Ruby that can also run Arvados. +Debian 9 (stretch) and Ubuntu 16.04 (xenial) ship Ruby 2.3, which is not supported by Arvados. Use "RVM":#rvm to install Ruby 2.5 or later. + +Debian 10 (buster) and Ubuntu 18.04 (bionic) and later ship with Ruby 2.5, which is supported by Arvados.
# apt-get --no-install-recommends install ruby ruby-dev bundler
diff --git a/doc/_layouts/default.html.liquid b/doc/_layouts/default.html.liquid index 2ce354f060..db6c00bc3e 100644 --- a/doc/_layouts/default.html.liquid +++ b/doc/_layouts/default.html.liquid @@ -22,57 +22,10 @@ SPDX-License-Identifier: CC-BY-SA-3.0 + - - -

User Guide — How to manage data and do analysis with Arvados.

diff --git a/doc/sdk/python/sdk-python.html.textile.liquid b/doc/sdk/python/sdk-python.html.textile.liquid index fa7c36c24b..2915d554d9 100644 --- a/doc/sdk/python/sdk-python.html.textile.liquid +++ b/doc/sdk/python/sdk-python.html.textile.liquid @@ -18,7 +18,7 @@ If you are logged in to an Arvados VM, the Python SDK should be installed. To use the Python SDK elsewhere, you can install from PyPI or a distribution package. -The Python SDK supports Python 2.7 and 3.4+ +As of Arvados 2.1, the Python SDK requires Python 3.5+. The last version to support Python 2.7 is Arvados 2.0.4. h2. Option 1: Install from a distribution package @@ -26,7 +26,7 @@ This installation method is recommended to make the CLI tools available system-w First, configure the "Arvados package repositories":../../install/packages.html -{% assign arvados_component = 'python-arvados-python-client' %} +{% assign arvados_component = 'python3-arvados-python-client' %} {% include 'install_packages' %} @@ -60,8 +60,8 @@ If you installed with pip (option 1, above):
~$ python
-Python 2.7.4 (default, Sep 26 2013, 03:20:26)
-[GCC 4.7.3] on linux2
+Python 3.7.3 (default, Jul 25 2020, 13:03:44)
+[GCC 8.3.0] on linux
 Type "help", "copyright", "credits" or "license" for more information.
 >>> import arvados
 >>> arvados.api('v1')
@@ -74,8 +74,8 @@ If you installed from a distribution package (option 2): the package includes a
 
 
~$ source /usr/share/python2.7/dist/python-arvados-python-client/bin/activate
 (python-arvados-python-client) ~$ python
-Python 2.7.4 (default, Sep 26 2013, 03:20:26)
-[GCC 4.7.3] on linux2
+Python 3.7.3 (default, Jul 25 2020, 13:03:44)
+[GCC 8.3.0] on linux
 Type "help", "copyright", "credits" or "license" for more information.
 >>> import arvados
 >>> arvados.api('v1')
@@ -87,8 +87,8 @@ Or alternatively, by using the Python executable from the virtualenv directly:
 
 
 
~$ /usr/share/python2.7/dist/python-arvados-python-client/bin/python
-Python 2.7.4 (default, Sep 26 2013, 03:20:26)
-[GCC 4.7.3] on linux2
+Python 3.7.3 (default, Jul 25 2020, 13:03:44)
+[GCC 8.3.0] on linux
 Type "help", "copyright", "credits" or "license" for more information.
 >>> import arvados
 >>> arvados.api('v1')
diff --git a/doc/user/index.html.textile.liquid b/doc/user/index.html.textile.liquid
index 4b0a443d3c..e24afc9a44 100644
--- a/doc/user/index.html.textile.liquid
+++ b/doc/user/index.html.textile.liquid
@@ -1,7 +1,7 @@
 ---
 layout: default
 navsection: userguide
-title: Welcome to Arvados!
+title: Welcome to Arvados™!
 ...
 {% comment %}
 Copyright (C) The Arvados Authors. All rights reserved.
@@ -9,12 +9,9 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
-Arvados is an open source platform for managing, processing, and sharing genomic and other large scientific and biomedical data.  This guide provides a reference for using Arvados to solve scientific big data problems, including:
+Arvados is an "open source":copying/copying.html platform for managing, processing, and sharing genomic and other large scientific and biomedical data.  With Arvados, bioinformaticians run and scale compute-intensive workflows, developers create biomedical applications, and IT administrators manage large compute and storage resources.
 
-* Robust storage of very large files, such as whole genome sequences, using the "Arvados Keep":{{site.baseurl}}/user/tutorials/tutorial-keep.html content-addressable cluster file system.
-* Running compute-intensive scientific analysis pipelines, such as genomic alignment and variant calls using the "Arvados Crunch":{{site.baseurl}}/user/tutorials/intro-crunch.html cluster compute engine.
-* Accessing, organizing, and sharing data, workflows and results using the "Arvados Workbench":{{site.baseurl}}/user/getting_started/workbench.html web application.
-* Running an analysis using multiple clusters (HPC, cloud, or hybrid) with "Federated Multi-Cluster Workflows":{{site.baseurl}}/user/cwl/federated-workflows.html .
+This guide provides a reference for using Arvados to solve scientific big data problems.
 
 The examples in this guide use the Arvados instance located at {{site.arvados_workbench_host}}.  If you are using a different Arvados instance replace @{{ site.arvados_workbench_host }}@ with your private instance in all of the examples in this guide.
 
diff --git a/doc/user/tutorials/wgs-tutorial.html.textile.liquid b/doc/user/tutorials/wgs-tutorial.html.textile.liquid
new file mode 100644
index 0000000000..cd4d1cc715
--- /dev/null
+++ b/doc/user/tutorials/wgs-tutorial.html.textile.liquid
@@ -0,0 +1,357 @@
+---
+layout: default
+navsection: userguide
+title: "Processing Whole Genome Sequences"
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+
+ +h2. 1. A Brief Introduction to Arvados + +Arvados is an open source platform for managing, processing, and sharing genomic and other large scientific and biomedical data. Arvados helps bioinformaticians run and scale compute-intensive workflows. By running their workflows in Arvados, they can scale their calculations dynamically in the cloud, track methods and datasets, and easily re-run workflow steps or whole workflows when necessary. This tutorial walkthrough shows examples of running a “real-world” workflow and how to navigate and use the Arvados working environment. + +When you log into your account on the Arvados playground ("https://playground.arvados.org":https://playground.arvados.org), you see the Arvados Workbench which is the web application that allows users to interactively access Arvados functionality. For this tutorial, we will largely focus on using the Arvados Workbench since that is an easy way to get started using Arvados. You can also access Arvados via your command line and/or using the available REST API and SDKs. If you are interested, this tutorial walkthrough will have an optional component that will cover using the command line. + +By using the Arvados Workbench or using the command line, you can submit your workflows to run on your Arvados cluster. An Arvados cluster can be hosted in the cloud as well as on premise and on hybrid clusters. The Arvados playground cluster is currently hosted in the cloud. + +You can also use the workbench or command line to access data in the Arvados storage system called Keep which is designed for managing and storing large collections of files on your Arvados cluster. The running of workflows is managed by Crunch. Crunch is designed to maintain data provenance and workflow reproducibility. Crunch automatically tracks data inputs and outputs through Keep and executes workflow processes in Docker containers. In a cloud environment, Crunch optimizes costs by scaling compute on demand. + +_Ways to Learn More About Arvados_ +* To learn more in general about Arvados, please visit the Arvados website here: "https://arvados.org/":https://arvados.org/ +* For a deeper dive into Arvados, the Arvados documentation can be found here: "https://doc.arvados.org/":https://doc.arvados.org/ +* For help on Arvados, visit the Gitter channel here: "https://gitter.im/arvados/community":https://gitter.im/arvados/community + + +h2. 2. A Brief Introduction to the Whole Genome Sequencing (WGS) Processing Tutorial + +The workflow used in this tutorial walkthrough serves as a “real-world” workflow example that takes in WGS data (paired FASTQs) and returns GVCFs and accompanying variant reports. In this walkthrough, we will be processing approximately 10 public genomes made available by the Personal Genome Project. This set of data is from the PGP-UK ("https://www.personalgenomes.org.uk/":https://www.personalgenomes.org.uk/). + +The overall steps in the workflow include: +* Check of FASTQ quality using FastQC ("https://www.bioinformatics.babraham.ac.uk/projects/fastqc/":https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +* Local alignment using BWA-MEM ("http://bio-bwa.sourceforge.net/bwa.shtml":http://bio-bwa.sourceforge.net/bwa.shtml) +* Variant calling in parallel using GATK Haplotype Caller ("https://gatk.broadinstitute.org/hc/en-us":https://gatk.broadinstitute.org/hc/en-us) +* Generation of an HTML report comparing variants against ClinVar archive ("https://www.ncbi.nlm.nih.gov/clinvar/":https://www.ncbi.nlm.nih.gov/clinvar/) + +The workflow is written in "Common Workflow Language":https://commonwl.org (CWL), the primary way to develop and run workflows for Arvados. + +Below are diagrams of the main workflow which runs the processing across multiple sets of fastq and the main subworkflow (run multiple times in parallel by the main workflow) which processes a single set of FASTQs. This main subworkflow also calls other additional subworkflows including subworkflows that perform variant calling using GATK in parallel by regions and generate the ClinVar HTML variant report. These CWL diagrams (generated using "CWL viewer":https://view.commonwl.org) will give you a basic idea of the flow, input/outputs and workflow steps involved in the tutorial example. However, if you aren’t used to looking at CWL workflow diagrams and/or aren’t particularly interested in this level of detail, do not worry. You will not need to know these particulars to run the workflow. + +
!{width: 100%}{{ site.baseurl }}/images/wgs-tutorial/image2.png! +
_*Figure 1*: Main CWL Workflow for WGS Processing Tutorial. This runs the same WGS subworkflow over multiple pairs FASTQs files._
+ +
!{width: 100%}{{ site.baseurl }}/images/wgs-tutorial/image3.png! +
_*Figure 2*: Main subworkflow for the WGS Processing Tutorial. This subworkflow does alignment, deduplication, variant calling and reporting._
+ +_Ways to Learn More About CWL_ + +* The CWL website has lots of good content including the CWL User Guide: "https://www.commonwl.org/":https://www.commonwl.org/ +* Commonly Asked Questions and Answers can be found in the Discourse Group, here: "https://cwl.discourse.group/":https://cwl.discourse.group/ +* For help on CWL, visit the Gitter channel here: "https://gitter.im/common-workflow-language/common-workflow-language":https://gitter.im/common-workflow-language/common-workflow-language +* Repository of CWL CommandLineTool descriptions for commons tools in bioinformatics: +"https://github.com/common-workflow-library/bio-cwl-tools/":https://github.com/common-workflow-library/bio-cwl-tools/ + + +h2. 3. Setting Up to Run the WGS Processing Workflow + +Let’s get a little familiar with the Arvados Workbench while also setting up to run the WGS processing tutorial workflow. Logging into the workbench will present you with the Dashboard. This gives a summary of your projects and recent activity in your Arvados instance, i.e. the Arvados Playground. The Dashboard will only give you information about projects and activities that you have permissions to view and/or access. Other users' private or restricted projects and activities will not be visible by design. + +h3. 3a. Setting up a New Project + +Projects in Arvados help you organize and track your work - and can contain data, workflow code, details about workflow runs, and results. Let’s begin by setting up a new project for the work you will be doing in this walkthrough. + +To create a new project, go to the Projects dropdown menu and select “Add a New Project”. + +
!{width: 100%}{{ site.baseurl }}/images/wgs-tutorial/image4.png! +
_*Figure 3*: Adding a new project using Arvados Workbench._
+ +Let’s name your project “WGS Processing Tutorial”. You can also add a description of your project using the *Edit* button. The universally unique identifier (UUID) of the project can be found in the URL. + +
!{width: 100%}{{ site.baseurl }}/images/wgs-tutorial/image6.png! +
_*Figure 4*: Renaming new project using Arvados Workbench. The UUID of the project can be found in the URL and is highlighted in yellow in this image for emphasis._
+ +If you choose to use another name for your project, just keep in mind when the project name is referenced in the walkthrough later on. + +h3. 3b. Working with Collections + +Collections in Arvados help organize and manage your data. You can upload your existing data into a collection or reuse data from one or more existing collections. Collections allow us to reorganize our files without duplicating or physically moving the data, making them very efficient to use even when working with terabytes of data. Each collection has a universally unique identifier (collection UUID). This is a constant for this collection, even if we add or remove files -- or rename the collection. You use this if we want to to identify the most recent version of our collection to use in our workflows. + +Arvados uses a content-addressable filesystem (i.e. Keep) where the addresses of files are derived from their contents. A major benefit of this is that Arvados can then verify that when a dataset is retrieved it is the dataset you requested and can track the exact datasets that were used for each of our previous calculations. This is what allows you to be certain that we are always working with the data that you think you are using. You use the content address of a collection when you want to guarantee that you use the same version as input to your workflow. + +
!{width: 100%}{{ site.baseurl }}/images/wgs-tutorial/image1.png! +
_*Figure 5*: A collection in Arvados as viewed via the Arvados Workbench. On the upper left you will find a panel that contains: the name of the collection (editable), a description of the collection (editable), the collection UUID and the content address and content size._
+ +Let’s start working with collections by copying the existing collection that stores the FASTQ data being processed into our new “WGS Processing Tutorial” project. + +First, you must find the collection you are interested in copying over to your project. There are several ways to search for a collection: by collection name, by UUID or by content address. In this case, let’s search for our collection by name. + +In this case it is called “PGP UK FASTQs” and by searching for it in the “search this site” box. It will come up and you can navigate to it. You would do similarly if you would want to search by UUID or content address. + +Now that you have found the collection of FASTQs you want to copy to your project, you can simply use the Copy to project... button and select your new project to copy the collection there. You can rename your collection whatever you wish, or use the default name on copy and add whatever description you would like. + + + +We want to do the same thing for the other inputs to our WGS workflow. Similar to the “PGP UK FASTQs” collection there is a collection of inputs entitled “WGS Processing reference data” and that collection can be copied over in a similar fashion. + +Now that we are a bit more familiar with the Arvados Workbench, projects and collections. Let’s move onto running a workflow. + +h2. 4. Running the WGS Processing Workflow + +In this section, we will be discussing three ways to run the tutorial workflow using Arvados. We will start using the easiest way and then progress to the more involved ways to run a workflow via the command line which will allow you more control over your inputs, workflow parameters and setup. Feel free to end your walkthrough after the first way or to pick and choose the ways that appeal the most to you, fit your experience and/or preferred way of working. + +h3. 4a. Interactively Running a Workflow Using Workbench + +Workflows can be registered in Arvados. Registration allows you to share a workflow with other Arvados users, and let’s them run the workflow by clicking the Run a process… button on the Workbench Dashboard and on the command line by specifying the workflow UUID. Default values can be specified for workflow inputs. + +We have already previously registered the WGS workflow and set default input values for this set of the walkthrough. + +Let’s find the the registered WGS Processing Workflow and run it interactively in our newly created project. + +# To find the registered workflow, you can search for it in the search box located in the top right corner of the Arvados Workbench by looking for the name “WGS Processing Workflow”. +# Once you have found the registered workflow, you can run it your project by using the Run this workflow.. button and selecting your project ("WGS Processing Tutorial") that you set up in Section 3a. +# Default inputs to the registered workflow will be automatically filled in. These inputs will still work. You can verify this by checking the addresses of the collections you copied over to your New Project. +# The input *Directory of paired FASTQ files* will need to be set. Click on Choose button, select "PGP UK FASTQs" in the *Choose a dataset* dialog and then click OK. +# Now, you can submit your workflow by scrolling to the bottom of the page and hitting the Run button. + +Congratulations! You have now submitted your workflow to run. You can move to Section 5 to learn how to check the state of your submitted workflow and Section 6 to learn how to examine the results of and logs from your workflow. + +Let’s now say instead of running a registered workflow you want to run a workflow using the command line. This is a completely optional step in the walkthrough. To do this, you can specify cwl files to define the workflow you want to run and the yml files to specify the inputs to our workflow. In this walkthrough we will give two options (4b) and (4c) for running the workflow on the commandline. Option 4b uses a virtual machine provided by Arvados made accessible via a browser that requires no additional setup. Option 4c allows you to submit from your personal machine but you must install necessary packages and edit configurations to allow you to submit to the Arvados cluster. Please choose whichever works best for you. + +h3. 4b. Optional: Setting up to Run a Workflow Using Command Line and an Arvados Virtual Machine + +Arvados provides a virtual machine which has all the necessary client-side libraries installed to submit to your Arvados cluster using the command line. Webshell gives you access to an Arvados Virtual Machine (VM) from your browser with no additional setup. You can access webshell through the Arvados Workbench. It is the easiest way to try out submitting a workflow to Arvados via the command line. + +New users are playground are automatically given access to a shell account. + +_Note_: the shell accounts are created on an interval and it may take up to two minutes from your initial log in before the shell account is created. + +You can follow the instructions here to access the machine using the browser (also known as using webshell): +* "Accessing an Arvados VM with Webshell":{{ site.baseurl }}/user/getting_started/vm-login-with-webshell.html + +Arvados also allows you to ssh into the shell machine and other hosted VMs instead of using the webshell capabilities. However this tutorial does not cover that option in-depth. If you like to explore it on your own, you can allow the instructions in the documentation here: +* "Accessing an Arvados VM with SSH - Unix Environments":{{ site.baseurl }}/user/getting_started/ssh-access-unix.html +* "Accessing an Arvados VM with SSH - Windows Environments":{{ site.baseurl }}/user/getting_started/ssh-access-windows.html + +Once you can use webshell, you can proceed to section *“4d. Running a Workflow Using the Command Line”* . + +h3. 4c. Optional: Setting up to Run a Workflow Using Command Line and Your Computer + +Instead of using a virtual machine provided by Arvados, you can install the necessary libraries and configure your computer to be able to submit to your Arvados cluster directly. This is more of an advanced option and is for users who are comfortable installing software and libraries and configuring them on their machines. + +To be able to submit workflows to the Arvados cluster, you will need to install the Python SDK on your machine. Additional features can be made available by installing additional libraries, but this is the bare minimum you need to install to do this walkthrough tutorial. You can follow the instructions in the Arvados documentment to install the Python SDK and set the appropriate configurations to access the Arvados Playground. + +* "Installing the Arvados CWL Runner":{{ site.baseurl }}/sdk/python/arvados-cwl-runner.html +* "Setting Configurations to Access the Arvados Playground":{{ site.baseurl }}/user/reference/api-tokens.html + +Once you have your machine set up to submit to the Arvados Playground Cluster, you can proceed to section *“4d. Running a Workflow Using the Command Line”* . + +h3. 4d. Optional: Running a Workflow Using the Command Line + +Now that we have access to a machine that can submit to the Arvados Playground, let’s download the relevant files containing the workflow description and inputs. + +First, we will +* Clone the tutorial repository from GitHub ("https://github.com/arvados/arvados-tutorial":https://github.com/arvados/arvados-tutorial) +* Change directories into the WGS tutorial folder + +
$ git clone https://github.com/arvados/arvados-tutorial.git
+$ cd arvados-tutorial/WGS-processing
+
+ +Recall that CWL is a way to describe command line tools and connect them together to create workflows. YML files can be used to specify input values into these individual command line tools or overarching workflows. + +The tutorial directories are as follows: +* @cwl@ - contains CWL descriptions of workflows and command line tools for the tutorial +* @yml@ - contains YML files for inputs for the main workflow or to test subworkflows command line tools +* @src@ - contains any source code necessary for the tutorial +* @docker@ - contains dockerfiles necessary to re-create any needed docker images used in the tutorial + +Before we run the WGS processing workflow, we want to adjust the inputs to match those in your new project. The workflow that we want to submit is described by the file @/cwl/@ and the inputs are given by the file @/yml/@. Note: while all the cwl files are needed to describe the full workflow only the single yml with the workflow inputs is needed to run the workflow. The additional yml files (in the helper folder) are provided for testing purposes or if one might want to test or run an underlying subworkflow or cwl for a command line tool by itself. + +Several of the inputs in the yml file point to original content addresses of collections that you make copies of in our New Project. These still work because even though we made copies of the collections into our new project we haven’t changed the underlying contents. However, by changing this file is in general how you would alter the inputs in the accompanying yml file for a given workflow. + +The command to submit to the Arvados Playground Cluster is @arvados-cwl-runner@. +To submit the WGS processing workflow , you need to run the following command replacing YOUR_PROJECT_UUID with the UUID of the new project you created for this tutorial. + +
$ arvados-cwl-runner --no-wait --project-uuid YOUR_PROJECT_UUID ./cwl/wgs-processing-wf.cwl ./yml/wgs-processing-wf.yml
+
+ +The @--no-wait@ option will submit the workflow to Arvados, print out the UUID of the job that was submitted to standard output, and exit instead of waiting until the job is finished to return the command prompt. + +The @--project-uuid@ option specifies the project you want the workflow to run in, that means the outputs and log collections as well as the workflow process will be saved in that project + +If the workflow submitted successfully, you should see the following at the end of the output to the screen + +
INFO Final process status is success
+
+ +Now, you are ready to check the state of your submitted workflow. + +h2. 5. Checking the State Of a Submitted Workflow + +Once you have submitted your workflow, you can examine its state interactively using the Arvados Workbench. If you aren’t already viewing your workflow process on the workbench, there several ways to get to your submitted workflow. Here are two of the simplest ways: + +* Via the Dashboard: It should be listed at the top of the list of “Recent Processes”. Just click on the name of your submitted workflow and it will take you to the submitted workflow information. +* Via Your Project: You will want to go back to your new project, using the Projects pulldown menu or searching for the project name. Note: You can mark a Project as a favorite (if/when you have multiple Projects) to make it easier to find on the pulldown menu using the star next to the project name on the project page. + +The process you will be looking for will be titled “WGS processing workflow scattered over samples”(if you submitted via the command line) or NAME OF REGISTERED WORKFLOW container (if you submitted via the Registered Workflow). + +Once you have found your workflow, you can clearly see the state of the overall workflow and underlying steps below by their label. + +Common states you will see are as follows: + +* Queued - Workflow or step is waiting to run +* Running or Active - Workflow is currently running +* Complete - Workflow or step has successfully completed +* Failing - Workflow is running but has steps that have failed +* Failed - Workflow or step did not complete successfully +* Cancelled - Workflow or step was either manually cancelled or was canceled by Arvados due to a system error + +Since Arvados Crunch reuses steps and workflows if possible, this workflow should run relatively quickly since this workflow has been run before and you have access to those previously run steps. You may notice an initial period where the top level job shows the option of canceling while the other steps are filled in with already finished steps. + +h2. 6. Examining a Finished Workflow + +Once your workflow has finished, you can see how long it took the workflow to run, see scaling information, and examine the logs and outputs. Outputs will be only available for steps that have been successfully completed. Outputs will be saved for every step in the workflow and be saved for the workflow itself. Outputs are saved in collections. You can access each collection by clicking on the link corresponding to the output. + +
!{width: 100%}{{ site.baseurl }}/images/wgs-tutorial/image5.png! +
_*Figure 6*: A completed workflow process in Arvados as viewed via the Arvados Workbench. You can click on the outputs link (highlighted in yellow) to view the outputs. Outputs of a workflow are stored in a collection._
+ +If we click on the outputs of the workflow, we will see the output collection. + +Contained in this collection, is the GVCF, tabix index file, and html ClinVar report for each analyzed sample (e.g. set of FASTQs). By clicking on the download button to the right of the file, you can download it to your local machine. You can also use the command line to download single files or whole collections to your machine. You can examine the outputs of a step similarly by using the arrow to expand the panel to see more details. + +Logs for the main process can be found in the Log tab. There several logs available, so here is a basic summary of what some of the more commonly used logs contain. Let's first define a few terms that will help us understand what the logs are tracking. + +As you may recall, Arvados Crunch manages the running of workflows. A _container request_ is an order sent to Arvados Crunch to perform some computational work. Crunch fulfils a request by either choosing a worker node to execute a container, or finding an identical/equivalent container that has already run. You can use _container request_ or _container_ to distinguish between a work order that is submitted to be run and a work order that is actually running or has been run. So our container request in this case is just the submitted workflow we sent to the Arvados cluster. + +A _node_ is a compute resource where Arvardos can schedule work. In our case since the Arvados Playground is running on a cloud, our nodes are virtual machines. @arvados-cwl-runner@ (acr) executes CWL workflows by submitting the individual parts to Arvados as containers and crunch-run is an internal component that runs on nodes and executes containers. + +* @stderr.txt@ +** Captures everything written to standard error by the programs run by the executing container +* @node-info.txt@ and @node.json@ +** Contains information about the nodes that executed this container. For the Arvados Playground, this gives information about the virtual machine instance that ran the container. +node.json gives a high level overview about the instance such as name, price, and RAM while node-info.txt gives more detailed information about the virtual machine (e.g. cpu of each processor) +* @crunch-run.txt@ and @crunchstat.txt@ +** @crunch-run.txt@ has info about how the container's execution environment was set up (e.g., time spent loading the docker image) and timing/results of copying output data to Keep (if applicable) +** @crunchstat.txt@ has info about resource consumption (RAM, cpu, disk, network) by the container while it was running. +* @container.json@ +** Describes the container (unit of work to be done), contains CWL code, runtime constraints (RAM, vcpus) amongst other details +* @arv-mount.txt@ +** Contains information using Arvados Keep on the node executing the container +* @hoststat.txt@ +** Contains about resource consumption (RAM, cpu, disk, network) on the node while it was running +This is different from the log crunchstat.txt because it includes resource consumption of Arvados components that run on the node outside the container such as crunch-run and other processes related to the Keep file system. + +For the highest level logs, the logs are tracking the container that ran the @arvados-cwl-runner@ process which you can think of as the “mastermind” behind tracking which parts of the CWL workflow need to be run when, which have been run already, what order they need to be run, which can be run simultaneously, and so forth and then sending out the related container requests. Each step then has their own logs related to containers running a CWL step of the workflow including a log of standard error that contains the standard error of the code run in that CWL step. Those logs can be found by expanding the steps and clicking on the link to the log collection. + +Let’s take a peek at a few of these logs to get you more familiar with them. First, we can look at the @stderr.txt@ of the highest level process. Again recall this should be of the “mastermind” @arvados-cwl-runner@ process. You can click on the log to download it to your local machine, and when you look at the contents - you should see something like the following... + +
2020-06-22T20:30:04.737703197Z INFO /usr/bin/arvados-cwl-runner 2.0.3, arvados-python-client 2.0.3, cwltool 1.0.20190831161204
+2020-06-22T20:30:04.743250012Z INFO Resolved '/var/lib/cwl/workflow.json#main' to 'file:///var/lib/cwl/workflow.json#main'
+2020-06-22T20:30:20.749884298Z INFO Using empty collection d41d8cd98f00b204e9800998ecf8427e+0
+[removing some log contents here for brevity]
+2020-06-22T20:30:35.629783939Z INFO Running inside container su92l-dz642-uaqhoebfh91zsfd
+2020-06-22T20:30:35.741778080Z INFO [workflow WGS processing workflow] start
+2020-06-22T20:30:35.741778080Z INFO [workflow WGS processing workflow] starting step getfastq
+2020-06-22T20:30:35.741778080Z INFO [step getfastq] start
+2020-06-22T20:30:36.085839313Z INFO [step getfastq] completed success
+2020-06-22T20:30:36.212789670Z INFO [workflow WGS processing workflow] starting step bwamem-gatk-report
+2020-06-22T20:30:36.213545871Z INFO [step bwamem-gatk-report] start
+2020-06-22T20:30:36.234224197Z INFO [workflow bwamem-gatk-report] start
+2020-06-22T20:30:36.234892498Z INFO [workflow bwamem-gatk-report] starting step fastqc
+2020-06-22T20:30:36.235154798Z INFO [step fastqc] start
+2020-06-22T20:30:36.237328201Z INFO Using empty collection d41d8cd98f00b204e9800998ecf8427e+0
+
+ +You can see the output of all the work that arvados-cwl-runner does by managing the execution of the CWL workflow and all the underlying steps and subworkflows. + +Now, let’s explore the logs for a step in the workflow. Remember that those logs can be found by expanding the steps and clicking on the link to the log collection. Let’s look at the log for the step that does the alignment. That step is named bwamem-samtools-view. We can see there are 10 of them because we are aligning 10 genomes. Let’s look at *bwamem-samtools-view2.* + +We click the arrow to open up the step, and then can click on the log collection to access the logs. You may notice there are two sets of seemingly identical logs. One listed under a directory named for a container and one up in the main directory. This is done in case your step had to be automatically re-run due to any issues and gives the logs of each re-run. The logs in the main directory are the logs for the successful run. In most cases this does not happen, you will just see one directory and one those logs will match the logs in the main directory. Let’s open the logs labeled node-info.txt and stderr.txt. + +@node-info.txt@ gives us information about detailed information about the virtual machine this step was run on. The tail end of the log should look like the following: + +
Memory Information
+MemTotal:       64465820 kB
+MemFree:        61617620 kB
+MemAvailable:   62590172 kB
+Buffers:           15872 kB
+Cached:          1493300 kB
+SwapCached:            0 kB
+Active:          1070868 kB
+Inactive:        1314248 kB
+Active(anon):     873716 kB
+Inactive(anon):     8444 kB
+Active(file):     197152 kB
+Inactive(file):  1305804 kB
+Unevictable:           0 kB
+Mlocked:               0 kB
+SwapTotal:             0 kB
+SwapFree:              0 kB
+Dirty:               952 kB
+Writeback:             0 kB
+AnonPages:        874968 kB
+Mapped:           115352 kB
+Shmem:              8604 kB
+Slab:             251844 kB
+SReclaimable:     106580 kB
+SUnreclaim:       145264 kB
+KernelStack:        5584 kB
+PageTables:         3832 kB
+NFS_Unstable:          0 kB
+Bounce:                0 kB
+WritebackTmp:          0 kB
+CommitLimit:    32232908 kB
+Committed_AS:    2076668 kB
+VmallocTotal:   34359738367 kB
+VmallocUsed:           0 kB
+VmallocChunk:          0 kB
+Percpu:             5120 kB
+AnonHugePages:    743424 kB
+ShmemHugePages:        0 kB
+ShmemPmdMapped:        0 kB
+HugePages_Total:       0
+HugePages_Free:        0
+HugePages_Rsvd:        0
+HugePages_Surp:        0
+Hugepagesize:       2048 kB
+Hugetlb:               0 kB
+DirectMap4k:      155620 kB
+DirectMap2M:     6703104 kB
+DirectMap1G:    58720256 kB
+
+Disk Space
+Filesystem      1M-blocks  Used Available Use% Mounted on
+/dev/nvme1n1p1       7874  1678      5778  23% /
+/dev/mapper/tmp    381746  1496    380251   1% /tmp
+
+Disk INodes
+Filesystem         Inodes IUsed     IFree IUse% Mounted on
+/dev/nvme1n1p1     516096 42253    473843    9% /
+/dev/mapper/tmp 195549184 44418 195504766    1% /tmp
+
+ +We can see all the details of the virtual machine used for this step, including that it has 16 cores and 64 GIB of RAM. + +@stderr.txt@ gives us everything written to standard error by the programs run in this step. This step ran successfully so we don’t need to use this to debug our step currently. We are just taking a look for practice. + +The tail end of our log should be similar to the following: + +
2020-08-04T04:37:19.674225566Z [main] CMD: /bwa-0.7.17/bwa mem -M -t 16 -R @RG\tID:sample\tSM:sample\tLB:sample\tPL:ILLUMINA\tPU:sample1 -c 250 /keep/18657d75efb4afd31a14bb204d073239+13611/GRCh38_no_alt_plus_hs38d1_analysis_set.fna /keep/a146a06222f9a66b7d141e078fc67660+376237/ERR2122554_1.fastq.gz /keep/a146a06222f9a66b7d141e078fc67660+376237/ERR2122554_2.fastq.gz
+2020-08-04T04:37:19.674225566Z [main] Real time: 35859.344 sec; CPU: 553120.701 sec
+
+ +This is the command we ran to invoke bwa-mem, and the scaling information for running bwa-mem multi-threaded across 16 cores (15.4x). + +We hope that now that you have a bit more familiarity with the logs you can continue to use them to debug and optimize your own workflows as you move forward with using Arvados if your own work in the future. + +h2. 7. Conclusion + +Thank you for working through this walkthrough tutorial. Hopefully this tutorial has helped you get a feel for working with Arvados. This tutorial just covered the basic capabilities of Arvados. There are many more capabilities to explore. Please see the links featured at the end of Section 1 for ways to learn more about Arvados or get help while you are working with Arvados. + +If you would like help setting up your own production instance of Arvados, please contact us at "info@curii.com.":mailto:info@curii.com + +
diff --git a/lib/cloud/cloudtest/tester.go b/lib/cloud/cloudtest/tester.go index 5288b5c76c..087aceffad 100644 --- a/lib/cloud/cloudtest/tester.go +++ b/lib/cloud/cloudtest/tester.go @@ -12,7 +12,7 @@ import ( "time" "git.arvados.org/arvados.git/lib/cloud" - "git.arvados.org/arvados.git/lib/dispatchcloud/ssh_executor" + "git.arvados.org/arvados.git/lib/dispatchcloud/sshexecutor" "git.arvados.org/arvados.git/lib/dispatchcloud/worker" "git.arvados.org/arvados.git/sdk/go/arvados" "github.com/sirupsen/logrus" @@ -48,7 +48,7 @@ type tester struct { is cloud.InstanceSet testInstance *worker.TagVerifier secret string - executor *ssh_executor.Executor + executor *sshexecutor.Executor showedLoginInfo bool failed bool @@ -308,7 +308,7 @@ func (t *tester) waitForBoot(deadline time.Time) bool { // current address. func (t *tester) updateExecutor() { if t.executor == nil { - t.executor = ssh_executor.New(t.testInstance) + t.executor = sshexecutor.New(t.testInstance) t.executor.SetTargetPort(t.SSHPort) t.executor.SetSigners(t.SSHKey) } else { diff --git a/lib/cloud/ec2/ec2.go b/lib/cloud/ec2/ec2.go index c329c1f88a..29062c491e 100644 --- a/lib/cloud/ec2/ec2.go +++ b/lib/cloud/ec2/ec2.go @@ -308,9 +308,8 @@ func (inst *ec2Instance) Destroy() error { func (inst *ec2Instance) Address() string { if inst.instance.PrivateIpAddress != nil { return *inst.instance.PrivateIpAddress - } else { - return "" } + return "" } func (inst *ec2Instance) RemoteUser() string { diff --git a/lib/config/cmd.go b/lib/config/cmd.go index 1ea0883ac8..347e8519a9 100644 --- a/lib/config/cmd.go +++ b/lib/config/cmd.go @@ -91,6 +91,7 @@ func (checkCommand) RunCommand(prog string, args []string, stdin io.Reader, stdo flags := flag.NewFlagSet("", flag.ContinueOnError) flags.SetOutput(stderr) loader.SetupFlags(flags) + strict := flags.Bool("strict", true, "Strict validation of configuration file (warnings result in non-zero exit code)") err = flags.Parse(args) if err == flag.ErrHelp { @@ -148,15 +149,21 @@ func (checkCommand) RunCommand(prog string, args []string, stdin io.Reader, stdo fmt.Fprintln(stdout, "Your configuration is relying on deprecated entries. Suggest making the following changes.") stdout.Write(diff) err = nil - return 1 + if *strict { + return 1 + } } else if len(diff) > 0 { fmt.Fprintf(stderr, "Unexpected diff output:\n%s", diff) - return 1 + if *strict { + return 1 + } } else if err != nil { return 1 } if logbuf.Len() > 0 { - return 1 + if *strict { + return 1 + } } if problems { diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml index b1865a2217..15e7c7c06c 100644 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@ -938,6 +938,11 @@ Clusters: # Time before repeating SIGTERM when killing a container. TimeoutSignal: 5s + # Time to give up on a process (most likely arv-mount) that + # still holds a container lockfile after its main supervisor + # process has exited, and declare the instance broken. + TimeoutStaleRunLock: 5s + # Time to give up on SIGTERM and write off the worker. TimeoutTERM: 2m diff --git a/lib/config/generated_config.go b/lib/config/generated_config.go index 201ae36045..7ed332151b 100644 --- a/lib/config/generated_config.go +++ b/lib/config/generated_config.go @@ -944,6 +944,11 @@ Clusters: # Time before repeating SIGTERM when killing a container. TimeoutSignal: 5s + # Time to give up on a process (most likely arv-mount) that + # still holds a container lockfile after its main supervisor + # process has exited, and declare the instance broken. + TimeoutStaleRunLock: 5s + # Time to give up on SIGTERM and write off the worker. TimeoutTERM: 2m diff --git a/lib/controller/federation/conn.go b/lib/controller/federation/conn.go index d715734c65..61cac9bbab 100644 --- a/lib/controller/federation/conn.go +++ b/lib/controller/federation/conn.go @@ -114,9 +114,8 @@ func (conn *Conn) chooseBackend(id string) backend { func (conn *Conn) localOrLoginCluster() backend { if conn.cluster.Login.LoginCluster != "" { return conn.chooseBackend(conn.cluster.Login.LoginCluster) - } else { - return conn.local } + return conn.local } // Call fn with the local backend; then, if fn returned 404, call fn @@ -204,9 +203,8 @@ func (conn *Conn) Login(ctx context.Context, options arvados.LoginOptions) (arva return arvados.LoginResponse{ RedirectLocation: target.String(), }, nil - } else { - return conn.local.Login(ctx, options) } + return conn.local.Login(ctx, options) } func (conn *Conn) Logout(ctx context.Context, options arvados.LogoutOptions) (arvados.LogoutResponse, error) { @@ -243,40 +241,39 @@ func (conn *Conn) CollectionGet(ctx context.Context, options arvados.GetOptions) c.ManifestText = rewriteManifest(c.ManifestText, options.UUID[:5]) } return c, err - } else { - // UUID is a PDH - first := make(chan arvados.Collection, 1) - err := conn.tryLocalThenRemotes(ctx, options.ForwardedFor, func(ctx context.Context, remoteID string, be backend) error { - remoteOpts := options - remoteOpts.ForwardedFor = conn.cluster.ClusterID + "-" + options.ForwardedFor - c, err := be.CollectionGet(ctx, remoteOpts) - if err != nil { - return err - } - // options.UUID is either hash+size or - // hash+size+hints; only hash+size need to - // match the computed PDH. - if pdh := arvados.PortableDataHash(c.ManifestText); pdh != options.UUID && !strings.HasPrefix(options.UUID, pdh+"+") { - err = httpErrorf(http.StatusBadGateway, "bad portable data hash %q received from remote %q (expected %q)", pdh, remoteID, options.UUID) - ctxlog.FromContext(ctx).Warn(err) - return err - } - if remoteID != "" { - c.ManifestText = rewriteManifest(c.ManifestText, remoteID) - } - select { - case first <- c: - return nil - default: - // lost race, return value doesn't matter - return nil - } - }) + } + // UUID is a PDH + first := make(chan arvados.Collection, 1) + err := conn.tryLocalThenRemotes(ctx, options.ForwardedFor, func(ctx context.Context, remoteID string, be backend) error { + remoteOpts := options + remoteOpts.ForwardedFor = conn.cluster.ClusterID + "-" + options.ForwardedFor + c, err := be.CollectionGet(ctx, remoteOpts) if err != nil { - return arvados.Collection{}, err + return err } - return <-first, nil + // options.UUID is either hash+size or + // hash+size+hints; only hash+size need to + // match the computed PDH. + if pdh := arvados.PortableDataHash(c.ManifestText); pdh != options.UUID && !strings.HasPrefix(options.UUID, pdh+"+") { + err = httpErrorf(http.StatusBadGateway, "bad portable data hash %q received from remote %q (expected %q)", pdh, remoteID, options.UUID) + ctxlog.FromContext(ctx).Warn(err) + return err + } + if remoteID != "" { + c.ManifestText = rewriteManifest(c.ManifestText, remoteID) + } + select { + case first <- c: + return nil + default: + // lost race, return value doesn't matter + return nil + } + }) + if err != nil { + return arvados.Collection{}, err } + return <-first, nil } func (conn *Conn) CollectionList(ctx context.Context, options arvados.ListOptions) (arvados.CollectionList, error) { @@ -445,9 +442,8 @@ func (conn *Conn) UserList(ctx context.Context, options arvados.ListOptions) (ar return arvados.UserList{}, err } return resp, nil - } else { - return conn.generated_UserList(ctx, options) } + return conn.generated_UserList(ctx, options) } func (conn *Conn) UserCreate(ctx context.Context, options arvados.CreateOptions) (arvados.User, error) { @@ -544,7 +540,6 @@ func (notFoundError) Error() string { return "not found" } func errStatus(err error) int { if httpErr, ok := err.(interface{ HTTPStatus() int }); ok { return httpErr.HTTPStatus() - } else { - return http.StatusInternalServerError } + return http.StatusInternalServerError } diff --git a/lib/controller/localdb/login_oidc.go b/lib/controller/localdb/login_oidc.go index 9274d75d7c..e0b01f13eb 100644 --- a/lib/controller/localdb/login_oidc.go +++ b/lib/controller/localdb/login_oidc.go @@ -106,34 +106,33 @@ func (ctrl *oidcLoginController) Login(ctx context.Context, opts arvados.LoginOp // one Google account. oauth2.SetAuthURLParam("prompt", "select_account")), }, nil - } else { - // Callback after OIDC sign-in. - state := ctrl.parseOAuth2State(opts.State) - if !state.verify([]byte(ctrl.Cluster.SystemRootToken)) { - return loginError(errors.New("invalid OAuth2 state")) - } - oauth2Token, err := ctrl.oauth2conf.Exchange(ctx, opts.Code) - if err != nil { - return loginError(fmt.Errorf("error in OAuth2 exchange: %s", err)) - } - rawIDToken, ok := oauth2Token.Extra("id_token").(string) - if !ok { - return loginError(errors.New("error in OAuth2 exchange: no ID token in OAuth2 token")) - } - idToken, err := ctrl.verifier.Verify(ctx, rawIDToken) - if err != nil { - return loginError(fmt.Errorf("error verifying ID token: %s", err)) - } - authinfo, err := ctrl.getAuthInfo(ctx, oauth2Token, idToken) - if err != nil { - return loginError(err) - } - ctxRoot := auth.NewContext(ctx, &auth.Credentials{Tokens: []string{ctrl.Cluster.SystemRootToken}}) - return ctrl.RailsProxy.UserSessionCreate(ctxRoot, rpc.UserSessionCreateOptions{ - ReturnTo: state.Remote + "," + state.ReturnTo, - AuthInfo: *authinfo, - }) } + // Callback after OIDC sign-in. + state := ctrl.parseOAuth2State(opts.State) + if !state.verify([]byte(ctrl.Cluster.SystemRootToken)) { + return loginError(errors.New("invalid OAuth2 state")) + } + oauth2Token, err := ctrl.oauth2conf.Exchange(ctx, opts.Code) + if err != nil { + return loginError(fmt.Errorf("error in OAuth2 exchange: %s", err)) + } + rawIDToken, ok := oauth2Token.Extra("id_token").(string) + if !ok { + return loginError(errors.New("error in OAuth2 exchange: no ID token in OAuth2 token")) + } + idToken, err := ctrl.verifier.Verify(ctx, rawIDToken) + if err != nil { + return loginError(fmt.Errorf("error verifying ID token: %s", err)) + } + authinfo, err := ctrl.getAuthInfo(ctx, oauth2Token, idToken) + if err != nil { + return loginError(err) + } + ctxRoot := auth.NewContext(ctx, &auth.Credentials{Tokens: []string{ctrl.Cluster.SystemRootToken}}) + return ctrl.RailsProxy.UserSessionCreate(ctxRoot, rpc.UserSessionCreateOptions{ + ReturnTo: state.Remote + "," + state.ReturnTo, + AuthInfo: *authinfo, + }) } func (ctrl *oidcLoginController) UserAuthenticate(ctx context.Context, opts arvados.UserAuthenticateOptions) (arvados.APIClientAuthorization, error) { @@ -190,9 +189,8 @@ func (ctrl *oidcLoginController) getAuthInfo(ctx context.Context, token *oauth2. // only the "fix config" advice to the user. ctxlog.FromContext(ctx).WithError(err).WithField("email", ret.Email).Error("People API is not enabled") return nil, errors.New("configuration error: Login.GoogleAlternateEmailAddresses is true, but Google People API is not enabled") - } else { - return nil, fmt.Errorf("error getting profile info from People API: %s", err) } + return nil, fmt.Errorf("error getting profile info from People API: %s", err) } // The given/family names returned by the People API and diff --git a/lib/crunchrun/background.go b/lib/crunchrun/background.go index bf039afa0a..8cdba72c10 100644 --- a/lib/crunchrun/background.go +++ b/lib/crunchrun/background.go @@ -218,6 +218,24 @@ func ListProcesses(stdout, stderr io.Writer) int { return nil } + proc, err := os.FindProcess(pi.PID) + if err != nil { + // FindProcess should have succeeded, even if the + // process does not exist. + fmt.Fprintf(stderr, "%s: find process %d: %s", path, pi.PID, err) + return nil + } + err = proc.Signal(syscall.Signal(0)) + if err != nil { + // Process is dead, even though lockfile was + // still locked. Most likely a stuck arv-mount + // process that inherited the lock from + // crunch-run. Report container UUID as + // "stale". + fmt.Fprintln(stdout, pi.UUID, "stale") + return nil + } + fmt.Fprintln(stdout, pi.UUID) return nil })) diff --git a/lib/crunchrun/crunchrun_test.go b/lib/crunchrun/crunchrun_test.go index e8c7660d1a..55cc6ee564 100644 --- a/lib/crunchrun/crunchrun_test.go +++ b/lib/crunchrun/crunchrun_test.go @@ -157,9 +157,8 @@ func (t *TestDockerClient) ContainerStart(ctx context.Context, container string, if container == "abcde" { // t.fn gets executed in ContainerWait return nil - } else { - return errors.New("Invalid container id") } + return errors.New("Invalid container id") } func (t *TestDockerClient) ContainerRemove(ctx context.Context, container string, options dockertypes.ContainerRemoveOptions) error { @@ -196,9 +195,8 @@ func (t *TestDockerClient) ImageInspectWithRaw(ctx context.Context, image string if t.imageLoaded == image { return dockertypes.ImageInspect{}, nil, nil - } else { - return dockertypes.ImageInspect{}, nil, errors.New("") } + return dockertypes.ImageInspect{}, nil, errors.New("") } func (t *TestDockerClient) ImageLoad(ctx context.Context, input io.Reader, quiet bool) (dockertypes.ImageLoadResponse, error) { @@ -208,10 +206,9 @@ func (t *TestDockerClient) ImageLoad(ctx context.Context, input io.Reader, quiet _, err := io.Copy(ioutil.Discard, input) if err != nil { return dockertypes.ImageLoadResponse{}, err - } else { - t.imageLoaded = hwImageId - return dockertypes.ImageLoadResponse{Body: ioutil.NopCloser(input)}, nil } + t.imageLoaded = hwImageId + return dockertypes.ImageLoadResponse{Body: ioutil.NopCloser(input)}, nil } func (*TestDockerClient) ImageRemove(ctx context.Context, image string, options dockertypes.ImageRemoveOptions) ([]dockertypes.ImageDeleteResponseItem, error) { @@ -260,9 +257,8 @@ func (client *ArvTestClient) Call(method, resourceType, uuid, action string, par case method == "GET" && resourceType == "containers" && action == "secret_mounts": if client.secretMounts != nil { return json.Unmarshal(client.secretMounts, output) - } else { - return json.Unmarshal([]byte(`{"secret_mounts":{}}`), output) } + return json.Unmarshal([]byte(`{"secret_mounts":{}}`), output) default: return fmt.Errorf("Not found") } diff --git a/lib/dispatchcloud/container/queue.go b/lib/dispatchcloud/container/queue.go index 45b346383f..a1ff414b73 100644 --- a/lib/dispatchcloud/container/queue.go +++ b/lib/dispatchcloud/container/queue.go @@ -382,7 +382,7 @@ func (cq *Queue) poll() (map[string]*arvados.Container, error) { *next[upd.UUID] = upd } } - selectParam := []string{"uuid", "state", "priority", "runtime_constraints", "container_image", "mounts", "scheduling_parameters"} + selectParam := []string{"uuid", "state", "priority", "runtime_constraints", "container_image", "mounts", "scheduling_parameters", "created_at"} limitParam := 1000 mine, err := cq.fetchAll(arvados.ResourceListParams{ diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go index 02b6c976ae..7614a143ab 100644 --- a/lib/dispatchcloud/dispatcher.go +++ b/lib/dispatchcloud/dispatcher.go @@ -17,7 +17,7 @@ import ( "git.arvados.org/arvados.git/lib/cloud" "git.arvados.org/arvados.git/lib/dispatchcloud/container" "git.arvados.org/arvados.git/lib/dispatchcloud/scheduler" - "git.arvados.org/arvados.git/lib/dispatchcloud/ssh_executor" + "git.arvados.org/arvados.git/lib/dispatchcloud/sshexecutor" "git.arvados.org/arvados.git/lib/dispatchcloud/worker" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/auth" @@ -100,7 +100,7 @@ func (disp *dispatcher) Close() { // Make a worker.Executor for the given instance. func (disp *dispatcher) newExecutor(inst cloud.Instance) worker.Executor { - exr := ssh_executor.New(inst) + exr := sshexecutor.New(inst) exr.SetTargetPort(disp.Cluster.Containers.CloudVMs.SSHPort) exr.SetSigners(disp.sshKey) return exr @@ -181,7 +181,7 @@ func (disp *dispatcher) run() { if pollInterval <= 0 { pollInterval = defaultPollInterval } - sched := scheduler.New(disp.Context, disp.queue, disp.pool, staleLockTimeout, pollInterval) + sched := scheduler.New(disp.Context, disp.queue, disp.pool, disp.Registry, staleLockTimeout, pollInterval) sched.Start() defer sched.Stop() diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go index 6e1850410b..d5d90bf351 100644 --- a/lib/dispatchcloud/dispatcher_test.go +++ b/lib/dispatchcloud/dispatcher_test.go @@ -66,6 +66,7 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) { ProbeInterval: arvados.Duration(5 * time.Millisecond), MaxProbesPerSecond: 1000, TimeoutSignal: arvados.Duration(3 * time.Millisecond), + TimeoutStaleRunLock: arvados.Duration(3 * time.Millisecond), TimeoutTERM: arvados.Duration(20 * time.Millisecond), ResourceTags: map[string]string{"testtag": "test value"}, TagKeyPrefix: "test:", @@ -169,6 +170,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) { stubvm.ReportBroken = time.Now().Add(time.Duration(rand.Int63n(200)) * time.Millisecond) default: stubvm.CrunchRunCrashRate = 0.1 + stubvm.ArvMountDeadlockRate = 0.1 } } s.stubDriver.Bugf = c.Errorf @@ -221,6 +223,14 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) { c.Check(resp.Body.String(), check.Matches, `(?ms).*time_to_ready_for_container_seconds{quantile="0.95"} [0-9.]*`) c.Check(resp.Body.String(), check.Matches, `(?ms).*time_to_ready_for_container_seconds_count [0-9]*`) c.Check(resp.Body.String(), check.Matches, `(?ms).*time_to_ready_for_container_seconds_sum [0-9.]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*time_from_shutdown_request_to_disappearance_seconds_count [0-9]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*time_from_shutdown_request_to_disappearance_seconds_sum [0-9.]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*time_from_queue_to_crunch_run_seconds_count [0-9]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*time_from_queue_to_crunch_run_seconds_sum [0-9e+.]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*run_probe_duration_seconds_count{outcome="success"} [0-9]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*run_probe_duration_seconds_sum{outcome="success"} [0-9e+.]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*run_probe_duration_seconds_count{outcome="fail"} [0-9]*`) + c.Check(resp.Body.String(), check.Matches, `(?ms).*run_probe_duration_seconds_sum{outcome="fail"} [0-9e+.]*`) } func (s *DispatcherSuite) TestAPIPermissions(c *check.C) { diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go index 0e8e1dc2ec..b9d653a821 100644 --- a/lib/dispatchcloud/scheduler/run_queue.go +++ b/lib/dispatchcloud/scheduler/run_queue.go @@ -33,6 +33,7 @@ func (sch *Scheduler) runQueue() { dontstart := map[arvados.InstanceType]bool{} var overquota []container.QueueEnt // entries that are unmappable because of worker pool quota + var containerAllocatedWorkerBootingCount int tryrun: for i, ctr := range sorted { @@ -92,11 +93,15 @@ tryrun: } else if sch.pool.StartContainer(it, ctr) { // Success. } else { + containerAllocatedWorkerBootingCount += 1 dontstart[it] = true } } } + sch.mContainersAllocatedNotStarted.Set(float64(containerAllocatedWorkerBootingCount)) + sch.mContainersNotAllocatedOverQuota.Set(float64(len(overquota))) + if len(overquota) > 0 { // Unlock any containers that are unmappable while // we're at quota. diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go index 530eb5db93..fd1d0a870b 100644 --- a/lib/dispatchcloud/scheduler/run_queue_test.go +++ b/lib/dispatchcloud/scheduler/run_queue_test.go @@ -13,6 +13,9 @@ import ( "git.arvados.org/arvados.git/lib/dispatchcloud/worker" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/ctxlog" + + "github.com/prometheus/client_golang/prometheus/testutil" + check "gopkg.in/check.v1" ) @@ -185,7 +188,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) { running: map[string]time.Time{}, canCreate: 0, } - New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue() + New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond).runQueue() c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1), test.InstanceType(1), test.InstanceType(1)}) c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(4)}) c.Check(pool.running, check.HasLen, 1) @@ -241,7 +244,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) { starts: []string{}, canCreate: 0, } - New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue() + New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond).runQueue() c.Check(pool.creates, check.DeepEquals, shouldCreate) if len(shouldCreate) == 0 { c.Check(pool.starts, check.DeepEquals, []string{}) @@ -336,7 +339,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) { }, } queue.Update() - New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue() + New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond).runQueue() c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(2), test.InstanceType(1)}) c.Check(pool.starts, check.DeepEquals, []string{uuids[6], uuids[5], uuids[3], uuids[2]}) running := map[string]bool{} @@ -380,10 +383,87 @@ func (*SchedulerSuite) TestKillNonexistentContainer(c *check.C) { }, } queue.Update() - sch := New(ctx, &queue, &pool, time.Millisecond, time.Millisecond) + sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) c.Check(pool.running, check.HasLen, 1) sch.sync() for deadline := time.Now().Add(time.Second); len(pool.Running()) > 0 && time.Now().Before(deadline); time.Sleep(time.Millisecond) { } c.Check(pool.Running(), check.HasLen, 0) } + +func (*SchedulerSuite) TestContainersMetrics(c *check.C) { + ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c)) + queue := test.Queue{ + ChooseType: chooseType, + Containers: []arvados.Container{ + { + UUID: test.ContainerUUID(1), + Priority: 1, + State: arvados.ContainerStateLocked, + CreatedAt: time.Now().Add(-10 * time.Second), + RuntimeConstraints: arvados.RuntimeConstraints{ + VCPUs: 1, + RAM: 1 << 30, + }, + }, + }, + } + queue.Update() + + // Create a pool with one unallocated (idle/booting/unknown) worker, + // and `idle` and `unknown` not set (empty). Iow this worker is in the booting + // state, and the container will be allocated but not started yet. + pool := stubPool{ + unalloc: map[arvados.InstanceType]int{test.InstanceType(1): 1}, + } + sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) + sch.runQueue() + sch.updateMetrics() + + c.Check(int(testutil.ToFloat64(sch.mContainersAllocatedNotStarted)), check.Equals, 1) + c.Check(int(testutil.ToFloat64(sch.mContainersNotAllocatedOverQuota)), check.Equals, 0) + c.Check(int(testutil.ToFloat64(sch.mLongestWaitTimeSinceQueue)), check.Equals, 10) + + // Create a pool without workers. The queued container will not be started, and the + // 'over quota' metric will be 1 because no workers are available and canCreate defaults + // to zero. + pool = stubPool{} + sch = New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) + sch.runQueue() + sch.updateMetrics() + + c.Check(int(testutil.ToFloat64(sch.mContainersAllocatedNotStarted)), check.Equals, 0) + c.Check(int(testutil.ToFloat64(sch.mContainersNotAllocatedOverQuota)), check.Equals, 1) + c.Check(int(testutil.ToFloat64(sch.mLongestWaitTimeSinceQueue)), check.Equals, 10) + + // Reset the queue, and create a pool with an idle worker. The queued + // container will be started immediately and mLongestWaitTimeSinceQueue + // should be zero. + queue = test.Queue{ + ChooseType: chooseType, + Containers: []arvados.Container{ + { + UUID: test.ContainerUUID(1), + Priority: 1, + State: arvados.ContainerStateLocked, + CreatedAt: time.Now().Add(-10 * time.Second), + RuntimeConstraints: arvados.RuntimeConstraints{ + VCPUs: 1, + RAM: 1 << 30, + }, + }, + }, + } + queue.Update() + + pool = stubPool{ + idle: map[arvados.InstanceType]int{test.InstanceType(1): 1}, + unalloc: map[arvados.InstanceType]int{test.InstanceType(1): 1}, + running: map[string]time.Time{}, + } + sch = New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) + sch.runQueue() + sch.updateMetrics() + + c.Check(int(testutil.ToFloat64(sch.mLongestWaitTimeSinceQueue)), check.Equals, 0) +} diff --git a/lib/dispatchcloud/scheduler/scheduler.go b/lib/dispatchcloud/scheduler/scheduler.go index 6409ea031a..c3e67dd11f 100644 --- a/lib/dispatchcloud/scheduler/scheduler.go +++ b/lib/dispatchcloud/scheduler/scheduler.go @@ -11,7 +11,9 @@ import ( "sync" "time" + "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/ctxlog" + "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" ) @@ -31,6 +33,7 @@ type Scheduler struct { logger logrus.FieldLogger queue ContainerQueue pool WorkerPool + reg *prometheus.Registry staleLockTimeout time.Duration queueUpdateInterval time.Duration @@ -41,17 +44,22 @@ type Scheduler struct { runOnce sync.Once stop chan struct{} stopped chan struct{} + + mContainersAllocatedNotStarted prometheus.Gauge + mContainersNotAllocatedOverQuota prometheus.Gauge + mLongestWaitTimeSinceQueue prometheus.Gauge } // New returns a new unstarted Scheduler. // // Any given queue and pool should not be used by more than one // scheduler at a time. -func New(ctx context.Context, queue ContainerQueue, pool WorkerPool, staleLockTimeout, queueUpdateInterval time.Duration) *Scheduler { - return &Scheduler{ +func New(ctx context.Context, queue ContainerQueue, pool WorkerPool, reg *prometheus.Registry, staleLockTimeout, queueUpdateInterval time.Duration) *Scheduler { + sch := &Scheduler{ logger: ctxlog.FromContext(ctx), queue: queue, pool: pool, + reg: reg, staleLockTimeout: staleLockTimeout, queueUpdateInterval: queueUpdateInterval, wakeup: time.NewTimer(time.Second), @@ -59,6 +67,59 @@ func New(ctx context.Context, queue ContainerQueue, pool WorkerPool, staleLockTi stopped: make(chan struct{}), uuidOp: map[string]string{}, } + sch.registerMetrics(reg) + return sch +} + +func (sch *Scheduler) registerMetrics(reg *prometheus.Registry) { + if reg == nil { + reg = prometheus.NewRegistry() + } + sch.mContainersAllocatedNotStarted = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "containers_allocated_not_started", + Help: "Number of containers allocated to a worker but not started yet (worker is booting).", + }) + reg.MustRegister(sch.mContainersAllocatedNotStarted) + sch.mContainersNotAllocatedOverQuota = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "containers_not_allocated_over_quota", + Help: "Number of containers not allocated to a worker because the system has hit a quota.", + }) + reg.MustRegister(sch.mContainersNotAllocatedOverQuota) + sch.mLongestWaitTimeSinceQueue = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "containers_longest_wait_time_seconds", + Help: "Current longest wait time of any container since queuing, and before the start of crunch-run.", + }) + reg.MustRegister(sch.mLongestWaitTimeSinceQueue) +} + +func (sch *Scheduler) updateMetrics() { + earliest := time.Time{} + entries, _ := sch.queue.Entries() + running := sch.pool.Running() + for _, ent := range entries { + if ent.Container.Priority > 0 && + (ent.Container.State == arvados.ContainerStateQueued || ent.Container.State == arvados.ContainerStateLocked) { + // Exclude containers that are preparing to run the payload (i.e. + // ContainerStateLocked and running on a worker, most likely loading the + // payload image + if _, ok := running[ent.Container.UUID]; !ok { + if ent.Container.CreatedAt.Before(earliest) || earliest.IsZero() { + earliest = ent.Container.CreatedAt + } + } + } + } + if !earliest.IsZero() { + sch.mLongestWaitTimeSinceQueue.Set(time.Since(earliest).Seconds()) + } else { + sch.mLongestWaitTimeSinceQueue.Set(0) + } } // Start starts the scheduler. @@ -113,6 +174,7 @@ func (sch *Scheduler) run() { for { sch.runQueue() sch.sync() + sch.updateMetrics() select { case <-sch.stop: return diff --git a/lib/dispatchcloud/scheduler/sync_test.go b/lib/dispatchcloud/scheduler/sync_test.go index 538f5ea8cf..a3ff0636e1 100644 --- a/lib/dispatchcloud/scheduler/sync_test.go +++ b/lib/dispatchcloud/scheduler/sync_test.go @@ -48,7 +48,7 @@ func (*SchedulerSuite) TestForgetIrrelevantContainers(c *check.C) { ents, _ := queue.Entries() c.Check(ents, check.HasLen, 1) - sch := New(ctx, &queue, &pool, time.Millisecond, time.Millisecond) + sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) sch.sync() ents, _ = queue.Entries() @@ -80,7 +80,7 @@ func (*SchedulerSuite) TestCancelOrphanedContainers(c *check.C) { ents, _ := queue.Entries() c.Check(ents, check.HasLen, 1) - sch := New(ctx, &queue, &pool, time.Millisecond, time.Millisecond) + sch := New(ctx, &queue, &pool, nil, time.Millisecond, time.Millisecond) // Sync shouldn't cancel the container because it might be // running on the VM with state=="unknown". diff --git a/lib/dispatchcloud/ssh_executor/executor.go b/lib/dispatchcloud/sshexecutor/executor.go similarity index 98% rename from lib/dispatchcloud/ssh_executor/executor.go rename to lib/dispatchcloud/sshexecutor/executor.go index 79b82e6c37..c37169921c 100644 --- a/lib/dispatchcloud/ssh_executor/executor.go +++ b/lib/dispatchcloud/sshexecutor/executor.go @@ -2,9 +2,9 @@ // // SPDX-License-Identifier: AGPL-3.0 -// Package ssh_executor provides an implementation of pool.Executor +// Package sshexecutor provides an implementation of pool.Executor // using a long-lived multiplexed SSH session. -package ssh_executor +package sshexecutor import ( "bytes" diff --git a/lib/dispatchcloud/ssh_executor/executor_test.go b/lib/dispatchcloud/sshexecutor/executor_test.go similarity index 99% rename from lib/dispatchcloud/ssh_executor/executor_test.go rename to lib/dispatchcloud/sshexecutor/executor_test.go index b7f3aadd8a..b4afeafa82 100644 --- a/lib/dispatchcloud/ssh_executor/executor_test.go +++ b/lib/dispatchcloud/sshexecutor/executor_test.go @@ -2,7 +2,7 @@ // // SPDX-License-Identifier: AGPL-3.0 -package ssh_executor +package sshexecutor import ( "bytes" diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go index 132bd4d695..4d32cf221c 100644 --- a/lib/dispatchcloud/test/stub_driver.go +++ b/lib/dispatchcloud/test/stub_driver.go @@ -131,7 +131,7 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags: copyTags(tags), providerType: it.ProviderType, initCommand: cmd, - running: map[string]int64{}, + running: map[string]stubProcess{}, killing: map[string]bool{}, } svm.SSHService = SSHService{ @@ -189,6 +189,8 @@ type StubVM struct { CrunchRunMissing bool CrunchRunCrashRate float64 CrunchRunDetachDelay time.Duration + ArvMountMaxExitLag time.Duration + ArvMountDeadlockRate float64 ExecuteContainer func(arvados.Container) int CrashRunningContainer func(arvados.Container) @@ -198,12 +200,21 @@ type StubVM struct { initCommand cloud.InitCommand providerType string SSHService SSHService - running map[string]int64 + running map[string]stubProcess killing map[string]bool lastPID int64 + deadlocked string sync.Mutex } +type stubProcess struct { + pid int64 + + // crunch-run has exited, but arv-mount process (or something) + // still holds lock in /var/run/ + exited bool +} + func (svm *StubVM) Instance() stubInstance { svm.Lock() defer svm.Unlock() @@ -256,7 +267,7 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, svm.Lock() svm.lastPID++ pid := svm.lastPID - svm.running[uuid] = pid + svm.running[uuid] = stubProcess{pid: pid} svm.Unlock() time.Sleep(svm.CrunchRunDetachDelay) fmt.Fprintf(stderr, "starting %s\n", uuid) @@ -273,14 +284,13 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, logger.Print("[test] exiting crunch-run stub") svm.Lock() defer svm.Unlock() - if svm.running[uuid] != pid { + if svm.running[uuid].pid != pid { bugf := svm.sis.driver.Bugf if bugf == nil { bugf = logger.Warnf } - bugf("[test] StubDriver bug or caller bug: pid %d exiting, running[%s]==%d", pid, uuid, svm.running[uuid]) - } else { - delete(svm.running, uuid) + bugf("[test] StubDriver bug or caller bug: pid %d exiting, running[%s].pid==%d", pid, uuid, svm.running[uuid].pid) + return } if !completed { logger.WithField("State", ctr.State).Print("[test] crashing crunch-run stub") @@ -288,6 +298,15 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, svm.CrashRunningContainer(ctr) } } + sproc := svm.running[uuid] + sproc.exited = true + svm.running[uuid] = sproc + svm.Unlock() + time.Sleep(svm.ArvMountMaxExitLag * time.Duration(math_rand.Float64())) + svm.Lock() + if math_rand.Float64() >= svm.ArvMountDeadlockRate { + delete(svm.running, uuid) + } }() crashluck := math_rand.Float64() @@ -333,26 +352,31 @@ func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, if command == "crunch-run --list" { svm.Lock() defer svm.Unlock() - for uuid := range svm.running { - fmt.Fprintf(stdout, "%s\n", uuid) + for uuid, sproc := range svm.running { + if sproc.exited { + fmt.Fprintf(stdout, "%s stale\n", uuid) + } else { + fmt.Fprintf(stdout, "%s\n", uuid) + } } if !svm.ReportBroken.IsZero() && svm.ReportBroken.Before(time.Now()) { fmt.Fprintln(stdout, "broken") } + fmt.Fprintln(stdout, svm.deadlocked) return 0 } if strings.HasPrefix(command, "crunch-run --kill ") { svm.Lock() - _, running := svm.running[uuid] - if running { + sproc, running := svm.running[uuid] + if running && !sproc.exited { svm.killing[uuid] = true svm.Unlock() time.Sleep(time.Duration(math_rand.Float64()*2) * time.Millisecond) svm.Lock() - _, running = svm.running[uuid] + sproc, running = svm.running[uuid] } svm.Unlock() - if running { + if running && !sproc.exited { fmt.Fprintf(stderr, "%s: container is running\n", uuid) return 1 } diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go index 086887cb44..a25ed60150 100644 --- a/lib/dispatchcloud/worker/pool.go +++ b/lib/dispatchcloud/worker/pool.go @@ -64,15 +64,16 @@ type Executor interface { } const ( - defaultSyncInterval = time.Minute - defaultProbeInterval = time.Second * 10 - defaultMaxProbesPerSecond = 10 - defaultTimeoutIdle = time.Minute - defaultTimeoutBooting = time.Minute * 10 - defaultTimeoutProbe = time.Minute * 10 - defaultTimeoutShutdown = time.Second * 10 - defaultTimeoutTERM = time.Minute * 2 - defaultTimeoutSignal = time.Second * 5 + defaultSyncInterval = time.Minute + defaultProbeInterval = time.Second * 10 + defaultMaxProbesPerSecond = 10 + defaultTimeoutIdle = time.Minute + defaultTimeoutBooting = time.Minute * 10 + defaultTimeoutProbe = time.Minute * 10 + defaultTimeoutShutdown = time.Second * 10 + defaultTimeoutTERM = time.Minute * 2 + defaultTimeoutSignal = time.Second * 5 + defaultTimeoutStaleRunLock = time.Second * 5 // Time after a quota error to try again anyway, even if no // instances have been shutdown. @@ -85,9 +86,8 @@ const ( func duration(conf arvados.Duration, def time.Duration) time.Duration { if conf > 0 { return time.Duration(conf) - } else { - return def } + return def } // NewPool creates a Pool of workers backed by instanceSet. @@ -115,6 +115,7 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe timeoutShutdown: duration(cluster.Containers.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown), timeoutTERM: duration(cluster.Containers.CloudVMs.TimeoutTERM, defaultTimeoutTERM), timeoutSignal: duration(cluster.Containers.CloudVMs.TimeoutSignal, defaultTimeoutSignal), + timeoutStaleRunLock: duration(cluster.Containers.CloudVMs.TimeoutStaleRunLock, defaultTimeoutStaleRunLock), installPublicKey: installPublicKey, tagKeyPrefix: cluster.Containers.CloudVMs.TagKeyPrefix, stop: make(chan bool), @@ -152,6 +153,7 @@ type Pool struct { timeoutShutdown time.Duration timeoutTERM time.Duration timeoutSignal time.Duration + timeoutStaleRunLock time.Duration installPublicKey ssh.PublicKey tagKeyPrefix string @@ -170,15 +172,18 @@ type Pool struct { runnerMD5 [md5.Size]byte runnerCmd string - mContainersRunning prometheus.Gauge - mInstances *prometheus.GaugeVec - mInstancesPrice *prometheus.GaugeVec - mVCPUs *prometheus.GaugeVec - mMemory *prometheus.GaugeVec - mBootOutcomes *prometheus.CounterVec - mDisappearances *prometheus.CounterVec - mTimeToSSH prometheus.Summary - mTimeToReadyForContainer prometheus.Summary + mContainersRunning prometheus.Gauge + mInstances *prometheus.GaugeVec + mInstancesPrice *prometheus.GaugeVec + mVCPUs *prometheus.GaugeVec + mMemory *prometheus.GaugeVec + mBootOutcomes *prometheus.CounterVec + mDisappearances *prometheus.CounterVec + mTimeToSSH prometheus.Summary + mTimeToReadyForContainer prometheus.Summary + mTimeFromShutdownToGone prometheus.Summary + mTimeFromQueueToCrunchRun prometheus.Summary + mRunProbeDuration *prometheus.SummaryVec } type createCall struct { @@ -661,6 +666,30 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) { Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001}, }) reg.MustRegister(wp.mTimeToReadyForContainer) + wp.mTimeFromShutdownToGone = prometheus.NewSummary(prometheus.SummaryOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "instances_time_from_shutdown_request_to_disappearance_seconds", + Help: "Number of seconds between the first shutdown attempt and the disappearance of the worker.", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001}, + }) + reg.MustRegister(wp.mTimeFromShutdownToGone) + wp.mTimeFromQueueToCrunchRun = prometheus.NewSummary(prometheus.SummaryOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "containers_time_from_queue_to_crunch_run_seconds", + Help: "Number of seconds between the queuing of a container and the start of crunch-run.", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001}, + }) + reg.MustRegister(wp.mTimeFromQueueToCrunchRun) + wp.mRunProbeDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{ + Namespace: "arvados", + Subsystem: "dispatchcloud", + Name: "instances_run_probe_duration_seconds", + Help: "Number of seconds per runProbe call.", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001}, + }, []string{"outcome"}) + reg.MustRegister(wp.mRunProbeDuration) } func (wp *Pool) runMetrics() { @@ -930,6 +959,10 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) { if wp.mDisappearances != nil { wp.mDisappearances.WithLabelValues(stateString[wkr.state]).Inc() } + // wkr.destroyed.IsZero() can happen if instance disappeared but we weren't trying to shut it down + if wp.mTimeFromShutdownToGone != nil && !wkr.destroyed.IsZero() { + wp.mTimeFromShutdownToGone.Observe(time.Now().Sub(wkr.destroyed).Seconds()) + } delete(wp.workers, id) go wkr.Close() notify = true diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go index 9199d4bafe..9e89d7daaf 100644 --- a/lib/dispatchcloud/worker/worker.go +++ b/lib/dispatchcloud/worker/worker.go @@ -110,6 +110,7 @@ type worker struct { probing chan struct{} bootOutcomeReported bool timeToReadyReported bool + staleRunLockSince time.Time } func (wkr *worker) onUnkillable(uuid string) { @@ -176,6 +177,9 @@ func (wkr *worker) startContainer(ctr arvados.Container) { } go func() { rr.Start() + if wkr.wp.mTimeFromQueueToCrunchRun != nil { + wkr.wp.mTimeFromQueueToCrunchRun.Observe(time.Since(ctr.CreatedAt).Seconds()) + } wkr.mtx.Lock() defer wkr.mtx.Unlock() now := time.Now() @@ -188,7 +192,7 @@ func (wkr *worker) startContainer(ctr arvados.Container) { } // ProbeAndUpdate conducts appropriate boot/running probes (if any) -// for the worker's curent state. If a previous probe is still +// for the worker's current state. If a previous probe is still // running, it does nothing. // // It should be called in a new goroutine. @@ -372,6 +376,7 @@ func (wkr *worker) probeRunning() (running []string, reportsBroken, ok bool) { if u := wkr.instance.RemoteUser(); u != "root" { cmd = "sudo " + cmd } + before := time.Now() stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil) if err != nil { wkr.logger.WithFields(logrus.Fields{ @@ -379,16 +384,48 @@ func (wkr *worker) probeRunning() (running []string, reportsBroken, ok bool) { "stdout": string(stdout), "stderr": string(stderr), }).WithError(err).Warn("probe failed") + wkr.wp.mRunProbeDuration.WithLabelValues("fail").Observe(time.Now().Sub(before).Seconds()) return } + wkr.wp.mRunProbeDuration.WithLabelValues("success").Observe(time.Now().Sub(before).Seconds()) ok = true + + staleRunLock := false for _, s := range strings.Split(string(stdout), "\n") { - if s == "broken" { + // Each line of the "crunch-run --list" output is one + // of the following: + // + // * a container UUID, indicating that processes + // related to that container are currently running. + // Optionally followed by " stale", indicating that + // the crunch-run process itself has exited (the + // remaining process is probably arv-mount). + // + // * the string "broken", indicating that the instance + // appears incapable of starting containers. + // + // See ListProcesses() in lib/crunchrun/background.go. + if s == "" { + // empty string following final newline + } else if s == "broken" { reportsBroken = true - } else if s != "" { + } else if toks := strings.Split(s, " "); len(toks) == 1 { running = append(running, s) + } else if toks[1] == "stale" { + wkr.logger.WithField("ContainerUUID", toks[0]).Info("probe reported stale run lock") + staleRunLock = true } } + wkr.mtx.Lock() + defer wkr.mtx.Unlock() + if !staleRunLock { + wkr.staleRunLockSince = time.Time{} + } else if wkr.staleRunLockSince.IsZero() { + wkr.staleRunLockSince = time.Now() + } else if dur := time.Now().Sub(wkr.staleRunLockSince); dur > wkr.wp.timeoutStaleRunLock { + wkr.logger.WithField("Duration", dur).Warn("reporting broken after reporting stale run lock for too long") + reportsBroken = true + } return } diff --git a/lib/dispatchcloud/worker/worker_test.go b/lib/dispatchcloud/worker/worker_test.go index a4c2a6370f..cfb7a1bfb7 100644 --- a/lib/dispatchcloud/worker/worker_test.go +++ b/lib/dispatchcloud/worker/worker_test.go @@ -17,6 +17,7 @@ import ( "git.arvados.org/arvados.git/lib/dispatchcloud/test" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/ctxlog" + "github.com/prometheus/client_golang/prometheus" check "gopkg.in/check.v1" ) @@ -239,6 +240,7 @@ func (suite *WorkerSuite) TestProbeAndUpdate(c *check.C) { runnerData: trial.deployRunner, runnerMD5: md5.Sum(trial.deployRunner), } + wp.registerMetrics(prometheus.NewRegistry()) if trial.deployRunner != nil { svHash := md5.Sum(trial.deployRunner) wp.runnerCmd = fmt.Sprintf("/var/run/arvados/crunch-run~%x", svHash) diff --git a/lib/install/deps.go b/lib/install/deps.go index ba57c20c35..93a0ce452b 100644 --- a/lib/install/deps.go +++ b/lib/install/deps.go @@ -104,7 +104,7 @@ func (installCommand) RunCommand(prog string, args []string, stdin io.Reader, st "ca-certificates", "cadaver", "curl", - "cython", + "cython3", "daemontools", // lib/boot uses setuidgid to drop privileges when running as root "default-jdk-headless", "default-jre-headless", @@ -127,7 +127,6 @@ func (installCommand) RunCommand(prog string, args []string, stdin io.Reader, st "libpam-dev", "libpcre3-dev", "libpq-dev", - "libpython2.7-dev", "libreadline-dev", "libssl-dev", "libwww-perl", @@ -142,13 +141,12 @@ func (installCommand) RunCommand(prog string, args []string, stdin io.Reader, st "pkg-config", "postgresql", "postgresql-contrib", - "python", "python3-dev", - "python-epydoc", "r-base", "r-cran-testthat", "sudo", - "virtualenv", + "python3-virtualenv", + "python3-venv", "wget", "xvfb", "zlib1g-dev", diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py index dcbe03a057..4bfe272789 100644 --- a/sdk/cwl/arvados_cwl/__init__.py +++ b/sdk/cwl/arvados_cwl/__init__.py @@ -192,6 +192,10 @@ def arg_parser(): # type: () -> argparse.ArgumentParser action="store_false", default=True, help=argparse.SUPPRESS) + parser.add_argument("--disable-color", dest="enable_color", + action="store_false", default=True, + help=argparse.SUPPRESS) + parser.add_argument("--disable-js-validation", action="store_true", default=False, help=argparse.SUPPRESS) diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py index fb23c2ccf7..99d82f3398 100644 --- a/sdk/cwl/arvados_cwl/arvcontainer.py +++ b/sdk/cwl/arvados_cwl/arvcontainer.py @@ -325,8 +325,8 @@ class ArvadosContainer(JobBase): logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"]) else: logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"]) - except Exception: - logger.exception("%s got an error", self.arvrunner.label(self)) + except Exception as e: + logger.exception("%s error submitting container\n%s", self.arvrunner.label(self), e) logger.debug("Container request was %s", container_request) self.output_callback({}, "permanentFail") @@ -475,6 +475,7 @@ class RunnerContainer(Runner): "--api=containers", "--no-log-timestamps", "--disable-validate", + "--disable-color", "--eval-timeout=%s" % self.arvrunner.eval_timeout, "--thread-count=%s" % self.arvrunner.thread_count, "--enable-reuse" if self.enable_reuse else "--disable-reuse", diff --git a/sdk/cwl/arvados_cwl/executor.py b/sdk/cwl/arvados_cwl/executor.py index 9ba798ec64..68141586de 100644 --- a/sdk/cwl/arvados_cwl/executor.py +++ b/sdk/cwl/arvados_cwl/executor.py @@ -524,6 +524,8 @@ The 'jobs' API is no longer supported. def arv_executor(self, updated_tool, job_order, runtimeContext, logger=None): self.debug = runtimeContext.debug + logger.info("Using cluster %s (%s)", self.api.config()["ClusterID"], self.api.config()["Services"]["Controller"]["ExternalURL"]) + updated_tool.visit(self.check_features) self.project_uuid = runtimeContext.project_uuid diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py index 4da545bf36..d2dd6e8162 100644 --- a/sdk/cwl/tests/test_submit.py +++ b/sdk/cwl/tests/test_submit.py @@ -302,7 +302,7 @@ def stubs(func): 'secret_mounts': {}, 'state': 'Committed', 'command': ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=1', '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue', '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json'], @@ -412,7 +412,7 @@ class TestSubmit(unittest.TestCase): expect_container = copy.deepcopy(stubs.expect_container_spec) expect_container["command"] = [ 'arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=1', '--disable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue', @@ -436,7 +436,7 @@ class TestSubmit(unittest.TestCase): expect_container = copy.deepcopy(stubs.expect_container_spec) expect_container["command"] = [ 'arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=1', '--disable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue', '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json'] @@ -468,7 +468,7 @@ class TestSubmit(unittest.TestCase): expect_container = copy.deepcopy(stubs.expect_container_spec) expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=1', '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=stop', @@ -491,7 +491,7 @@ class TestSubmit(unittest.TestCase): expect_container = copy.deepcopy(stubs.expect_container_spec) expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=1', '--enable-reuse', "--collection-cache-size=256", "--output-name="+output_name, '--debug', '--on-error=continue', @@ -513,7 +513,7 @@ class TestSubmit(unittest.TestCase): expect_container = copy.deepcopy(stubs.expect_container_spec) expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=1', '--enable-reuse', "--collection-cache-size=256", "--debug", "--storage-classes=foo", '--on-error=continue', @@ -576,7 +576,7 @@ class TestSubmit(unittest.TestCase): expect_container = copy.deepcopy(stubs.expect_container_spec) expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=1', '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue', @@ -599,7 +599,7 @@ class TestSubmit(unittest.TestCase): expect_container = copy.deepcopy(stubs.expect_container_spec) expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=1', '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue', @@ -623,7 +623,7 @@ class TestSubmit(unittest.TestCase): expect_container = copy.deepcopy(stubs.expect_container_spec) expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=1', '--enable-reuse', "--collection-cache-size=256", "--output-tags="+output_tags, '--debug', '--on-error=continue', @@ -700,7 +700,7 @@ class TestSubmit(unittest.TestCase): 'name': 'expect_arvworkflow.cwl#main', 'container_image': '999999999999999999999999999999d3+99', 'command': ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=1', '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue', '/var/lib/cwl/workflow/expect_arvworkflow.cwl#main', '/var/lib/cwl/cwl.input.json'], @@ -795,7 +795,7 @@ class TestSubmit(unittest.TestCase): 'name': 'a test workflow', 'container_image': "999999999999999999999999999999d3+99", 'command': ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=1', '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue', '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json'], @@ -859,7 +859,7 @@ class TestSubmit(unittest.TestCase): expect_container = copy.deepcopy(stubs.expect_container_spec) expect_container["owner_uuid"] = project_uuid expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', "--eval-timeout=20", "--thread-count=1", '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue', @@ -881,7 +881,7 @@ class TestSubmit(unittest.TestCase): expect_container = copy.deepcopy(stubs.expect_container_spec) expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=60.0', '--thread-count=1', '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue', @@ -902,7 +902,7 @@ class TestSubmit(unittest.TestCase): expect_container = copy.deepcopy(stubs.expect_container_spec) expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=1', '--enable-reuse', "--collection-cache-size=500", '--debug', '--on-error=continue', @@ -924,7 +924,7 @@ class TestSubmit(unittest.TestCase): expect_container = copy.deepcopy(stubs.expect_container_spec) expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=20', '--enable-reuse', "--collection-cache-size=256", '--debug', '--on-error=continue', @@ -994,7 +994,7 @@ class TestSubmit(unittest.TestCase): "arv": "http://arvados.org/cwl#", } expect_container['command'] = ['arvados-cwl-runner', '--local', '--api=containers', - '--no-log-timestamps', '--disable-validate', + '--no-log-timestamps', '--disable-validate', '--disable-color', '--eval-timeout=20', '--thread-count=1', '--enable-reuse', "--collection-cache-size=512", '--debug', '--on-error=continue', '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json'] @@ -1059,6 +1059,7 @@ class TestSubmit(unittest.TestCase): "--api=containers", "--no-log-timestamps", "--disable-validate", + "--disable-color", "--eval-timeout=20", '--thread-count=1', "--enable-reuse", diff --git a/sdk/dev-jobs.dockerfile b/sdk/dev-jobs.dockerfile index dd067e9778..f7719dbc43 100644 --- a/sdk/dev-jobs.dockerfile +++ b/sdk/dev-jobs.dockerfile @@ -14,12 +14,12 @@ # of the docker build root.) FROM debian:9 -MAINTAINER Ward Vandewege +MAINTAINER Peter Amstutz ENV DEBIAN_FRONTEND noninteractive -ARG pythoncmd=python -ARG pipcmd=pip +ARG pythoncmd=python3 +ARG pipcmd=pip3 RUN apt-get update -q && apt-get install -qy --no-install-recommends \ git ${pythoncmd}-pip ${pythoncmd}-virtualenv ${pythoncmd}-dev libcurl4-gnutls-dev \ diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go index 363d09dafb..394e30a737 100644 --- a/sdk/go/arvados/config.go +++ b/sdk/go/arvados/config.go @@ -462,6 +462,7 @@ type CloudVMsConfig struct { TimeoutProbe Duration TimeoutShutdown Duration TimeoutSignal Duration + TimeoutStaleRunLock Duration TimeoutTERM Duration ResourceTags map[string]string TagKeyPrefix string diff --git a/sdk/go/auth/salt.go b/sdk/go/auth/salt.go index 667a30f5ef..2140215986 100644 --- a/sdk/go/auth/salt.go +++ b/sdk/go/auth/salt.go @@ -26,9 +26,8 @@ func SaltToken(token, remote string) (string, error) { if len(parts) < 3 || parts[0] != "v2" { if reObsoleteToken.MatchString(token) { return "", ErrObsoleteToken - } else { - return "", ErrTokenFormat } + return "", ErrTokenFormat } uuid := parts[1] secret := parts[2] diff --git a/sdk/go/stats/duration.go b/sdk/go/stats/duration.go index cf91726334..facb71d212 100644 --- a/sdk/go/stats/duration.go +++ b/sdk/go/stats/duration.go @@ -29,7 +29,7 @@ func (d *Duration) UnmarshalJSON(data []byte) error { return d.Set(string(data)) } -// Value implements flag.Value +// Set implements flag.Value func (d *Duration) Set(s string) error { sec, err := strconv.ParseFloat(s, 64) if err == nil { diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py index 5f12b62eeb..93fd6b598a 100755 --- a/sdk/python/arvados/commands/arv_copy.py +++ b/sdk/python/arvados/commands/arv_copy.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 -# arv-copy [--recursive] [--no-recursive] object-uuid src dst +# arv-copy [--recursive] [--no-recursive] object-uuid # # Copies an object from Arvados instance src to instance dst. # @@ -34,6 +34,7 @@ import sys import logging import tempfile import urllib.parse +import io import arvados import arvados.config @@ -87,17 +88,17 @@ def main(): '-f', '--force', dest='force', action='store_true', help='Perform copy even if the object appears to exist at the remote destination.') copy_opts.add_argument( - '--src', dest='source_arvados', required=True, + '--src', dest='source_arvados', help='The name of the source Arvados instance (required) - points at an Arvados config file. May be either a pathname to a config file, or (for example) "foo" as shorthand for $HOME/.config/arvados/foo.conf.') copy_opts.add_argument( - '--dst', dest='destination_arvados', required=True, + '--dst', dest='destination_arvados', help='The name of the destination Arvados instance (required) - points at an Arvados config file. May be either a pathname to a config file, or (for example) "foo" as shorthand for $HOME/.config/arvados/foo.conf.') copy_opts.add_argument( '--recursive', dest='recursive', action='store_true', - help='Recursively copy any dependencies for this object. (default)') + help='Recursively copy any dependencies for this object, and subprojects. (default)') copy_opts.add_argument( '--no-recursive', dest='recursive', action='store_false', - help='Do not copy any dependencies. NOTE: if this option is given, the copied object will need to be updated manually in order to be functional.') + help='Do not copy any dependencies or subprojects.') copy_opts.add_argument( '--project-uuid', dest='project_uuid', help='The UUID of the project at the destination to which the collection or workflow should be copied.') @@ -118,6 +119,9 @@ def main(): else: logger.setLevel(logging.INFO) + if not args.source_arvados: + args.source_arvados = args.object_uuid[:5] + # Create API clients for the source and destination instances src_arv = api_for_instance(args.source_arvados) dst_arv = api_for_instance(args.destination_arvados) @@ -135,6 +139,9 @@ def main(): elif t == 'Workflow': set_src_owner_uuid(src_arv.workflows(), args.object_uuid, args) result = copy_workflow(args.object_uuid, src_arv, dst_arv, args) + elif t == 'Group': + set_src_owner_uuid(src_arv.groups(), args.object_uuid, args) + result = copy_project(args.object_uuid, src_arv, dst_arv, args.project_uuid, args) else: abort("cannot copy object {} of type {}".format(args.object_uuid, t)) @@ -170,6 +177,10 @@ def set_src_owner_uuid(resource, uuid, args): # $HOME/.config/arvados/instance_name.conf # def api_for_instance(instance_name): + if not instance_name: + # Use environment + return arvados.api('v1', model=OrderedJsonModel()) + if '/' in instance_name: config_file = instance_name else: @@ -296,7 +307,14 @@ def copy_workflow(wf_uuid, src, dst, args): # copy the workflow itself del wf['uuid'] wf['owner_uuid'] = args.project_uuid - return dst.workflows().create(body=wf).execute(num_retries=args.retries) + + existing = dst.workflows().list(filters=[["owner_uuid", "=", args.project_uuid], + ["name", "=", wf["name"]]]).execute() + if len(existing["items"]) == 0: + return dst.workflows().create(body=wf).execute(num_retries=args.retries) + else: + return dst.workflows().update(uuid=existing["items"][0]["uuid"], body=wf).execute(num_retries=args.retries) + def workflow_collections(obj, locations, docker_images): if isinstance(obj, dict): @@ -305,7 +323,7 @@ def workflow_collections(obj, locations, docker_images): if loc.startswith("keep:"): locations.append(loc[5:]) - docker_image = obj.get('dockerImageId', None) or obj.get('dockerPull', None) + docker_image = obj.get('dockerImageId', None) or obj.get('dockerPull', None) or obj.get('acrContainerImage', None) if docker_image is not None: ds = docker_image.split(":", 1) tag = ds[1] if len(ds)==2 else 'latest' @@ -516,7 +534,7 @@ def copy_collection(obj_uuid, src, dst, args): # a new manifest as we go. src_keep = arvados.keep.KeepClient(api_client=src, num_retries=args.retries) dst_keep = arvados.keep.KeepClient(api_client=dst, num_retries=args.retries) - dst_manifest = "" + dst_manifest = io.StringIO() dst_locators = {} bytes_written = 0 bytes_expected = total_collection_size(manifest) @@ -527,14 +545,15 @@ def copy_collection(obj_uuid, src, dst, args): for line in manifest.splitlines(): words = line.split() - dst_manifest += words[0] + dst_manifest.write(words[0]) for word in words[1:]: try: loc = arvados.KeepLocator(word) except ValueError: # If 'word' can't be parsed as a locator, # presume it's a filename. - dst_manifest += ' ' + word + dst_manifest.write(' ') + dst_manifest.write(word) continue blockhash = loc.md5sum # copy this block if we haven't seen it before @@ -547,17 +566,18 @@ def copy_collection(obj_uuid, src, dst, args): dst_locator = dst_keep.put(data) dst_locators[blockhash] = dst_locator bytes_written += loc.size - dst_manifest += ' ' + dst_locators[blockhash] - dst_manifest += "\n" + dst_manifest.write(' ') + dst_manifest.write(dst_locators[blockhash]) + dst_manifest.write("\n") if progress_writer: progress_writer.report(obj_uuid, bytes_written, bytes_expected) progress_writer.finish() # Copy the manifest and save the collection. - logger.debug('saving %s with manifest: <%s>', obj_uuid, dst_manifest) + logger.debug('saving %s with manifest: <%s>', obj_uuid, dst_manifest.getvalue()) - c['manifest_text'] = dst_manifest + c['manifest_text'] = dst_manifest.getvalue() return create_collection_from(c, src, dst, args) def select_git_url(api, repo_name, retries, allow_insecure_http, allow_insecure_http_opt): @@ -632,6 +652,42 @@ def copy_docker_image(docker_image, docker_image_tag, src, dst, args): else: logger.warning('Could not find docker image {}:{}'.format(docker_image, docker_image_tag)) +def copy_project(obj_uuid, src, dst, owner_uuid, args): + + src_project_record = src.groups().get(uuid=obj_uuid).execute(num_retries=args.retries) + + # Create/update the destination project + existing = dst.groups().list(filters=[["owner_uuid", "=", owner_uuid], + ["name", "=", src_project_record["name"]]]).execute(num_retries=args.retries) + if len(existing["items"]) == 0: + project_record = dst.groups().create(body={"group": {"group_class": "project", + "owner_uuid": owner_uuid, + "name": src_project_record["name"]}}).execute(num_retries=args.retries) + else: + project_record = existing["items"][0] + + dst.groups().update(uuid=project_record["uuid"], + body={"group": { + "description": src_project_record["description"]}}).execute(num_retries=args.retries) + + args.project_uuid = project_record["uuid"] + + logger.debug('Copying %s to %s', obj_uuid, project_record["uuid"]) + + # Copy collections + copy_collections([col["uuid"] for col in arvados.util.list_all(src.collections().list, filters=[["owner_uuid", "=", obj_uuid]])], + src, dst, args) + + # Copy workflows + for w in arvados.util.list_all(src.workflows().list, filters=[["owner_uuid", "=", obj_uuid]]): + copy_workflow(w["uuid"], src, dst, args) + + if args.recursive: + for g in arvados.util.list_all(src.groups().list, filters=[["owner_uuid", "=", obj_uuid]]): + copy_project(g["uuid"], src, dst, project_record["uuid"], args) + + return project_record + # git_rev_parse(rev, repo) # # Returns the 40-character commit hash corresponding to 'rev' in @@ -654,7 +710,7 @@ def git_rev_parse(rev, repo): # Special case: if handed a Keep locator hash, return 'Collection'. # def uuid_type(api, object_uuid): - if re.match(r'^[a-f0-9]{32}\+[0-9]+(\+[A-Za-z0-9+-]+)?$', object_uuid): + if re.match(arvados.util.keep_locator_pattern, object_uuid): return 'Collection' p = object_uuid.split('-') if len(p) == 3: diff --git a/sdk/python/arvados/commands/run.py b/sdk/python/arvados/commands/run.py index a45775470a..1e64eeb1da 100644 --- a/sdk/python/arvados/commands/run.py +++ b/sdk/python/arvados/commands/run.py @@ -236,7 +236,7 @@ def uploadfiles(files, api, dry_run=False, num_retries=0, # empty collection pdh = collection.portable_data_hash() assert (pdh == config.EMPTY_BLOCK_LOCATOR), "Empty collection portable_data_hash did not have expected locator, was %s" % pdh - logger.info("Using empty collection %s", pdh) + logger.debug("Using empty collection %s", pdh) for c in files: c.keepref = "%s/%s" % (pdh, c.fn) diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py index f4c1230cc8..0cb4151ac3 100644 --- a/sdk/python/tests/run_test_server.py +++ b/sdk/python/tests/run_test_server.py @@ -43,6 +43,14 @@ import arvados.config ARVADOS_DIR = os.path.realpath(os.path.join(MY_DIRNAME, '../../..')) SERVICES_SRC_DIR = os.path.join(ARVADOS_DIR, 'services') + +# Work around https://bugs.python.org/issue27805, should be no longer +# necessary from sometime in Python 3.8.x +if not os.environ.get('ARVADOS_DEBUG', ''): + WRITE_MODE = 'a' +else: + WRITE_MODE = 'w' + if 'GOPATH' in os.environ: # Add all GOPATH bin dirs to PATH -- but insert them after the # ruby gems bin dir, to ensure "bundle" runs the Ruby bundler @@ -327,7 +335,7 @@ def run(leave_running_atexit=False): env.pop('ARVADOS_API_HOST', None) env.pop('ARVADOS_API_HOST_INSECURE', None) env.pop('ARVADOS_API_TOKEN', None) - logf = open(_logfilename('railsapi'), 'a') + logf = open(_logfilename('railsapi'), WRITE_MODE) railsapi = subprocess.Popen( ['bundle', 'exec', 'passenger', 'start', '-p{}'.format(port), @@ -409,7 +417,7 @@ def run_controller(): if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ: return stop_controller() - logf = open(_logfilename('controller'), 'a') + logf = open(_logfilename('controller'), WRITE_MODE) port = internal_port_from_config("Controller") controller = subprocess.Popen( ["arvados-server", "controller"], @@ -429,7 +437,7 @@ def run_ws(): return stop_ws() port = internal_port_from_config("Websocket") - logf = open(_logfilename('ws'), 'a') + logf = open(_logfilename('ws'), WRITE_MODE) ws = subprocess.Popen( ["arvados-server", "ws"], stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True) @@ -462,7 +470,7 @@ def _start_keep(n, blob_signing=False): yaml.safe_dump(confdata, f) keep_cmd = ["keepstore", "-config", conf] - with open(_logfilename('keep{}'.format(n)), 'a') as logf: + with open(_logfilename('keep{}'.format(n)), WRITE_MODE) as logf: with open('/dev/null') as _stdin: child = subprocess.Popen( keep_cmd, stdin=_stdin, stdout=logf, stderr=logf, close_fds=True) @@ -529,7 +537,7 @@ def run_keep_proxy(): port = internal_port_from_config("Keepproxy") env = os.environ.copy() env['ARVADOS_API_TOKEN'] = auth_token('anonymous') - logf = open(_logfilename('keepproxy'), 'a') + logf = open(_logfilename('keepproxy'), WRITE_MODE) kp = subprocess.Popen( ['keepproxy'], env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True) @@ -568,7 +576,7 @@ def run_arv_git_httpd(): gitport = internal_port_from_config("GitHTTP") env = os.environ.copy() env.pop('ARVADOS_API_TOKEN', None) - logf = open(_logfilename('arv-git-httpd'), 'a') + logf = open(_logfilename('arv-git-httpd'), WRITE_MODE) agh = subprocess.Popen(['arv-git-httpd'], env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf) with open(_pidfile('arv-git-httpd'), 'w') as f: @@ -587,7 +595,7 @@ def run_keep_web(): keepwebport = internal_port_from_config("WebDAV") env = os.environ.copy() - logf = open(_logfilename('keep-web'), 'a') + logf = open(_logfilename('keep-web'), WRITE_MODE) keepweb = subprocess.Popen( ['keep-web'], env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf) diff --git a/sdk/python/tests/test_arv_copy.py b/sdk/python/tests/test_arv_copy.py index 324d6e05d7..452c2beba2 100644 --- a/sdk/python/tests/test_arv_copy.py +++ b/sdk/python/tests/test_arv_copy.py @@ -7,11 +7,18 @@ import os import sys import tempfile import unittest +import shutil +import arvados.api +from arvados.collection import Collection, CollectionReader import arvados.commands.arv_copy as arv_copy from . import arvados_testutil as tutil +from . import run_test_server + +class ArvCopyVersionTestCase(run_test_server.TestCaseWithServers, tutil.VersionChecker): + MAIN_SERVER = {} + KEEP_SERVER = {} -class ArvCopyTestCase(unittest.TestCase, tutil.VersionChecker): def run_copy(self, args): sys.argv = ['arv-copy'] + args return arv_copy.main() @@ -26,3 +33,50 @@ class ArvCopyTestCase(unittest.TestCase, tutil.VersionChecker): with self.assertRaises(SystemExit): self.run_copy(['--version']) self.assertVersionOutput(out, err) + + def test_copy_project(self): + api = arvados.api() + src_proj = api.groups().create(body={"group": {"name": "arv-copy project", "group_class": "project"}}).execute()["uuid"] + + c = Collection() + with c.open('foo', 'wt') as f: + f.write('foo') + c.save_new("arv-copy foo collection", owner_uuid=src_proj) + + dest_proj = api.groups().create(body={"group": {"name": "arv-copy dest project", "group_class": "project"}}).execute()["uuid"] + + tmphome = tempfile.mkdtemp() + home_was = os.environ['HOME'] + os.environ['HOME'] = tmphome + try: + cfgdir = os.path.join(tmphome, ".config", "arvados") + os.makedirs(cfgdir) + with open(os.path.join(cfgdir, "zzzzz.conf"), "wt") as f: + f.write("ARVADOS_API_HOST=%s\n" % os.environ["ARVADOS_API_HOST"]) + f.write("ARVADOS_API_TOKEN=%s\n" % os.environ["ARVADOS_API_TOKEN"]) + f.write("ARVADOS_API_HOST_INSECURE=1\n") + + contents = api.groups().list(filters=[["owner_uuid", "=", dest_proj]]).execute() + assert len(contents["items"]) == 0 + + try: + self.run_copy(["--project-uuid", dest_proj, src_proj]) + except SystemExit as e: + assert e.code == 0 + + contents = api.groups().list(filters=[["owner_uuid", "=", dest_proj]]).execute() + assert len(contents["items"]) == 1 + + assert contents["items"][0]["name"] == "arv-copy project" + copied_project = contents["items"][0]["uuid"] + + contents = api.collections().list(filters=[["owner_uuid", "=", copied_project]]).execute() + assert len(contents["items"]) == 1 + + assert contents["items"][0]["uuid"] != c.manifest_locator() + assert contents["items"][0]["name"] == "arv-copy foo collection" + assert contents["items"][0]["portable_data_hash"] == c.portable_data_hash() + + finally: + os.environ['HOME'] = home_was + shutil.rmtree(tmphome) diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb index 81b9ca9e5b..656bd37ae6 100644 --- a/services/api/app/controllers/arvados/v1/collections_controller.rb +++ b/services/api/app/controllers/arvados/v1/collections_controller.rb @@ -56,11 +56,19 @@ class Arvados::V1::CollectionsController < ApplicationController end def find_object_by_uuid - @include_old_versions = true + if params[:include_old_versions].nil? + @include_old_versions = true + else + @include_old_versions = params[:include_old_versions] + end if loc = Keep::Locator.parse(params[:id]) loc.strip_hints! + opts = {} + opts.update({include_trash: true}) if params[:include_trash] + opts.update({include_old_versions: @include_old_versions}) + # It matters which Collection object we pick because we use it to get signed_manifest_text, # the value of which is affected by the value of trash_at. # @@ -72,14 +80,13 @@ class Arvados::V1::CollectionsController < ApplicationController # it will select the Collection object with the longest # available lifetime. - if c = Collection.readable_by(*@read_users).where({ portable_data_hash: loc.to_s }).order("trash_at desc").limit(1).first + if c = Collection.readable_by(*@read_users, opts).where({ portable_data_hash: loc.to_s }).order("trash_at desc").limit(1).first @object = { uuid: c.portable_data_hash, portable_data_hash: c.portable_data_hash, manifest_text: c.signed_manifest_text, } end - true else super end diff --git a/services/api/app/controllers/arvados/v1/users_controller.rb b/services/api/app/controllers/arvados/v1/users_controller.rb index 867b9a6e6a..cd23706d08 100644 --- a/services/api/app/controllers/arvados/v1/users_controller.rb +++ b/services/api/app/controllers/arvados/v1/users_controller.rb @@ -22,7 +22,15 @@ class Arvados::V1::UsersController < ApplicationController rescue ActiveRecord::RecordNotUnique retry end - u.update_attributes!(nullify_attrs(attrs)) + needupdate = {} + nullify_attrs(attrs).each do |k,v| + if !v.nil? && u.send(k) != v + needupdate[k] = v + end + end + if needupdate.length > 0 + u.update_attributes!(needupdate) + end @objects << u end @offset = 0 diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb index 07bedebfaa..3966b7c393 100644 --- a/services/api/app/models/arvados_model.rb +++ b/services/api/app/models/arvados_model.rb @@ -454,7 +454,7 @@ class ArvadosModel < ApplicationRecord end def logged_attributes - attributes.except(*Rails.configuration.AuditLogs.UnloggedAttributes.keys) + attributes.except(*Rails.configuration.AuditLogs.UnloggedAttributes.stringify_keys.keys) end def self.full_text_searchable_columns diff --git a/services/api/lib/config_loader.rb b/services/api/lib/config_loader.rb index cf16993ca5..f421fb5b2a 100644 --- a/services/api/lib/config_loader.rb +++ b/services/api/lib/config_loader.rb @@ -147,14 +147,14 @@ class ConfigLoader 'Ki' => 1 << 10, 'M' => 1000000, 'Mi' => 1 << 20, - "G" => 1000000000, - "Gi" => 1 << 30, - "T" => 1000000000000, - "Ti" => 1 << 40, - "P" => 1000000000000000, - "Pi" => 1 << 50, - "E" => 1000000000000000000, - "Ei" => 1 << 60, + "G" => 1000000000, + "Gi" => 1 << 30, + "T" => 1000000000000, + "Ti" => 1 << 40, + "P" => 1000000000000000, + "Pi" => 1 << 50, + "E" => 1000000000000000000, + "Ei" => 1 << 60, }[mt[2]] end end diff --git a/services/api/lib/enable_jobs_api.rb b/services/api/lib/enable_jobs_api.rb index 1a96a81ad6..cef76f08a5 100644 --- a/services/api/lib/enable_jobs_api.rb +++ b/services/api/lib/enable_jobs_api.rb @@ -2,16 +2,19 @@ # # SPDX-License-Identifier: AGPL-3.0 -Disable_update_jobs_api_method_list = {"jobs.create"=>{}, - "pipeline_instances.create"=>{}, - "pipeline_templates.create"=>{}, - "jobs.update"=>{}, - "pipeline_instances.update"=>{}, - "pipeline_templates.update"=>{}, - "job_tasks.create"=>{}, - "job_tasks.update"=>{}} +Disable_update_jobs_api_method_list = ConfigLoader.to_OrderedOptions({ + "jobs.create"=>{}, + "pipeline_instances.create"=>{}, + "pipeline_templates.create"=>{}, + "jobs.update"=>{}, + "pipeline_instances.update"=>{}, + "pipeline_templates.update"=>{}, + "job_tasks.create"=>{}, + "job_tasks.update"=>{} + }) -Disable_jobs_api_method_list = {"jobs.create"=>{}, +Disable_jobs_api_method_list = ConfigLoader.to_OrderedOptions({ + "jobs.create"=>{}, "pipeline_instances.create"=>{}, "pipeline_templates.create"=>{}, "jobs.get"=>{}, @@ -36,7 +39,7 @@ Disable_jobs_api_method_list = {"jobs.create"=>{}, "jobs.show"=>{}, "pipeline_instances.show"=>{}, "pipeline_templates.show"=>{}, - "job_tasks.show"=>{}} + "job_tasks.show"=>{}}) def check_enable_legacy_jobs_api # Create/update is permanently disabled (legacy functionality has been removed) diff --git a/services/api/test/fixtures/logs.yml b/services/api/test/fixtures/logs.yml index 0785c12a50..25f1efff62 100644 --- a/services/api/test/fixtures/logs.yml +++ b/services/api/test/fixtures/logs.yml @@ -4,51 +4,56 @@ noop: # nothing happened ...to the 'spectator' user id: 1 - uuid: zzzzz-xxxxx-pshmckwoma9plh7 + uuid: zzzzz-57u5n-pshmckwoma9plh7 owner_uuid: zzzzz-tpzed-000000000000000 object_uuid: zzzzz-tpzed-l1s2piq4t4mps8r object_owner_uuid: zzzzz-tpzed-000000000000000 event_at: <%= 1.minute.ago.to_s(:db) %> + created_at: <%= 1.minute.ago.to_s(:db) %> admin_changes_repository2: # admin changes repository2, which is owned by active user id: 2 - uuid: zzzzz-xxxxx-pshmckwoma00002 + uuid: zzzzz-57u5n-pshmckwoma00002 owner_uuid: zzzzz-tpzed-d9tiejq69daie8f # admin user object_uuid: zzzzz-2x53u-382brsig8rp3667 # repository foo object_owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz # active user + created_at: <%= 2.minute.ago.to_s(:db) %> event_at: <%= 2.minute.ago.to_s(:db) %> event_type: update admin_changes_specimen: # admin changes specimen owned_by_spectator id: 3 - uuid: zzzzz-xxxxx-pshmckwoma00003 + uuid: zzzzz-57u5n-pshmckwoma00003 owner_uuid: zzzzz-tpzed-d9tiejq69daie8f # admin user object_uuid: zzzzz-2x53u-3b0xxwzlbzxq5yr # specimen owned_by_spectator object_owner_uuid: zzzzz-tpzed-l1s2piq4t4mps8r # spectator user + created_at: <%= 3.minute.ago.to_s(:db) %> event_at: <%= 3.minute.ago.to_s(:db) %> event_type: update system_adds_foo_file: # foo collection added, readable by active through link id: 4 - uuid: zzzzz-xxxxx-pshmckwoma00004 + uuid: zzzzz-57u5n-pshmckwoma00004 owner_uuid: zzzzz-tpzed-000000000000000 # system user object_uuid: zzzzz-4zz18-znfnqtbbv4spc3w # foo file object_owner_uuid: zzzzz-tpzed-000000000000000 # system user + created_at: <%= 4.minute.ago.to_s(:db) %> event_at: <%= 4.minute.ago.to_s(:db) %> event_type: create system_adds_baz: # baz collection added, readable by active and spectator through group 'all users' group membership id: 5 - uuid: zzzzz-xxxxx-pshmckwoma00005 + uuid: zzzzz-57u5n-pshmckwoma00005 owner_uuid: zzzzz-tpzed-000000000000000 # system user object_uuid: zzzzz-4zz18-y9vne9npefyxh8g # baz file object_owner_uuid: zzzzz-tpzed-000000000000000 # system user + created_at: <%= 5.minute.ago.to_s(:db) %> event_at: <%= 5.minute.ago.to_s(:db) %> event_type: create log_owned_by_active: id: 6 - uuid: zzzzz-xxxxx-pshmckwoma12345 + uuid: zzzzz-57u5n-pshmckwoma12345 owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz # active user object_uuid: zzzzz-2x53u-382brsig8rp3667 # repository foo object_owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz # active user diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb index d8017881d5..c025394bc1 100644 --- a/services/api/test/functional/arvados/v1/collections_controller_test.rb +++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb @@ -1388,6 +1388,16 @@ EOS json_response['name'] end + test 'can get old version collection by PDH' do + authorize_with :active + get :show, params: { + id: collections(:collection_owned_by_active_past_version_1).portable_data_hash, + } + assert_response :success + assert_equal collections(:collection_owned_by_active_past_version_1).portable_data_hash, + json_response['portable_data_hash'] + end + test 'version and current_version_uuid are ignored at creation time' do permit_unsigned_manifests authorize_with :active diff --git a/services/api/test/functional/arvados/v1/groups_controller_test.rb b/services/api/test/functional/arvados/v1/groups_controller_test.rb index ff89cd2129..f413188b54 100644 --- a/services/api/test/functional/arvados/v1/groups_controller_test.rb +++ b/services/api/test/functional/arvados/v1/groups_controller_test.rb @@ -430,7 +430,8 @@ class Arvados::V1::GroupsControllerTest < ActionController::TestCase end test 'get contents with jobs and pipeline instances disabled' do - Rails.configuration.API.DisabledAPIs = {'jobs.index'=>{}, 'pipeline_instances.index'=>{}} + Rails.configuration.API.DisabledAPIs = ConfigLoader.to_OrderedOptions( + {'jobs.index'=>{}, 'pipeline_instances.index'=>{}}) authorize_with :active get :contents, params: { diff --git a/services/api/test/functional/arvados/v1/schema_controller_test.rb b/services/api/test/functional/arvados/v1/schema_controller_test.rb index 3dd343b13c..764f3a8d1d 100644 --- a/services/api/test/functional/arvados/v1/schema_controller_test.rb +++ b/services/api/test/functional/arvados/v1/schema_controller_test.rb @@ -65,8 +65,8 @@ class Arvados::V1::SchemaControllerTest < ActionController::TestCase end test "non-empty disable_api_methods" do - Rails.configuration.API.DisabledAPIs = - {'jobs.create'=>{}, 'pipeline_instances.create'=>{}, 'pipeline_templates.create'=>{}} + Rails.configuration.API.DisabledAPIs = ConfigLoader.to_OrderedOptions( + {'jobs.create'=>{}, 'pipeline_instances.create'=>{}, 'pipeline_templates.create'=>{}}) get :index assert_response :success discovery_doc = JSON.parse(@response.body) diff --git a/services/api/test/functional/arvados/v1/users_controller_test.rb b/services/api/test/functional/arvados/v1/users_controller_test.rb index 0ce9f1137f..ea5d5b1436 100644 --- a/services/api/test/functional/arvados/v1/users_controller_test.rb +++ b/services/api/test/functional/arvados/v1/users_controller_test.rb @@ -1039,9 +1039,12 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase test "batch update" do existinguuid = 'remot-tpzed-foobarbazwazqux' newuuid = 'remot-tpzed-newnarnazwazqux' + unchanginguuid = 'remot-tpzed-nochangingattrs' act_as_system_user do User.create!(uuid: existinguuid, email: 'root@existing.example.com') + User.create!(uuid: unchanginguuid, email: 'root@unchanging.example.com', prefs: {'foo' => {'bar' => 'baz'}}) end + assert_equal(1, Log.where(object_uuid: unchanginguuid).count) authorize_with(:admin) patch(:batch_update, @@ -1059,6 +1062,10 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase 'email' => 'root@remot.example.com', 'username' => '', }, + unchanginguuid => { + 'email' => 'root@unchanging.example.com', + 'prefs' => {'foo' => {'bar' => 'baz'}}, + }, }}) assert_response(:success) @@ -1070,6 +1077,8 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase assert_equal('noot', User.find_by_uuid(newuuid).first_name) assert_equal('root@remot.example.com', User.find_by_uuid(newuuid).email) + + assert_equal(1, Log.where(object_uuid: unchanginguuid).count) end NON_ADMIN_USER_DATA = ["uuid", "kind", "is_active", "email", "first_name", diff --git a/services/api/test/functional/user_sessions_controller_test.rb b/services/api/test/functional/user_sessions_controller_test.rb index cd475dea4d..d979208d38 100644 --- a/services/api/test/functional/user_sessions_controller_test.rb +++ b/services/api/test/functional/user_sessions_controller_test.rb @@ -68,7 +68,7 @@ class UserSessionsControllerTest < ActionController::TestCase test "login to LoginCluster" do Rails.configuration.Login.LoginCluster = 'zbbbb' - Rails.configuration.RemoteClusters['zbbbb'] = {'Host' => 'zbbbb.example.com'} + Rails.configuration.RemoteClusters['zbbbb'] = ConfigLoader.to_OrderedOptions({'Host' => 'zbbbb.example.com'}) api_client_page = 'http://client.example.com/home' get :login, params: {return_to: api_client_page} assert_response :redirect diff --git a/services/api/test/test_helper.rb b/services/api/test/test_helper.rb index c99a57aaff..5dc77cb98a 100644 --- a/services/api/test/test_helper.rb +++ b/services/api/test/test_helper.rb @@ -62,7 +62,7 @@ class ActiveSupport::TestCase include ArvadosTestSupport include CurrentApiClient - teardown do + setup do Thread.current[:api_client_ip_address] = nil Thread.current[:api_client_authorization] = nil Thread.current[:api_client_uuid] = nil @@ -72,6 +72,14 @@ class ActiveSupport::TestCase restore_configuration end + teardown do + # Confirm that any changed configuration doesn't include non-symbol keys + $arvados_config.keys.each do |conf_name| + conf = Rails.configuration.send(conf_name) + confirm_keys_as_symbols(conf, conf_name) if conf.respond_to?('keys') + end + end + def assert_equal(expect, *args) if expect.nil? assert_nil(*args) @@ -99,6 +107,14 @@ class ActiveSupport::TestCase end end + def confirm_keys_as_symbols(conf, conf_name) + assert(conf.is_a?(ActiveSupport::OrderedOptions), "#{conf_name} should be an OrderedOptions object") + conf.keys.each do |k| + assert(k.is_a?(Symbol), "Key '#{k}' on section '#{conf_name}' should be a Symbol") + confirm_keys_as_symbols(conf[k], "#{conf_name}.#{k}") if conf[k].respond_to?('keys') + end + end + def restore_configuration # Restore configuration settings changed during tests ConfigLoader.copy_into_config $arvados_config, Rails.configuration diff --git a/services/api/test/unit/application_test.rb b/services/api/test/unit/application_test.rb index 679dddf223..e1565ec627 100644 --- a/services/api/test/unit/application_test.rb +++ b/services/api/test/unit/application_test.rb @@ -7,7 +7,7 @@ require 'test_helper' class ApplicationTest < ActiveSupport::TestCase include CurrentApiClient - test "test act_as_system_user" do + test "act_as_system_user" do Thread.current[:user] = users(:active) assert_equal users(:active), Thread.current[:user] act_as_system_user do @@ -17,7 +17,7 @@ class ApplicationTest < ActiveSupport::TestCase assert_equal users(:active), Thread.current[:user] end - test "test act_as_system_user is exception safe" do + test "act_as_system_user is exception safe" do Thread.current[:user] = users(:active) assert_equal users(:active), Thread.current[:user] caught = false @@ -33,4 +33,12 @@ class ApplicationTest < ActiveSupport::TestCase assert caught assert_equal users(:active), Thread.current[:user] end + + test "config maps' keys are returned as symbols" do + assert Rails.configuration.Users.AutoSetupUsernameBlacklist.is_a? ActiveSupport::OrderedOptions + assert Rails.configuration.Users.AutoSetupUsernameBlacklist.keys.size > 0 + Rails.configuration.Users.AutoSetupUsernameBlacklist.keys.each do |k| + assert k.is_a? Symbol + end + end end diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb index addea83062..48cae5afee 100644 --- a/services/api/test/unit/collection_test.rb +++ b/services/api/test/unit/collection_test.rb @@ -1044,10 +1044,10 @@ class CollectionTest < ActiveSupport::TestCase end test "create collections with managed properties" do - Rails.configuration.Collections.ManagedProperties = { + Rails.configuration.Collections.ManagedProperties = ConfigLoader.to_OrderedOptions({ 'default_prop1' => {'Value' => 'prop1_value'}, 'responsible_person_uuid' => {'Function' => 'original_owner'} - } + }) # Test collection without initial properties act_as_user users(:active) do c = create_collection 'foo', Encoding::US_ASCII @@ -1076,9 +1076,9 @@ class CollectionTest < ActiveSupport::TestCase end test "update collection with protected managed properties" do - Rails.configuration.Collections.ManagedProperties = { + Rails.configuration.Collections.ManagedProperties = ConfigLoader.to_OrderedOptions({ 'default_prop1' => {'Value' => 'prop1_value', 'Protected' => true}, - } + }) act_as_user users(:active) do c = create_collection 'foo', Encoding::US_ASCII assert c.valid? diff --git a/services/api/test/unit/container_request_test.rb b/services/api/test/unit/container_request_test.rb index b91910d2d6..90de800b2f 100644 --- a/services/api/test/unit/container_request_test.rb +++ b/services/api/test/unit/container_request_test.rb @@ -576,7 +576,7 @@ class ContainerRequestTest < ActiveSupport::TestCase test "Container.resolve_container_image(pdh)" do set_user_from_auth :active [[:docker_image, 'v1'], [:docker_image_1_12, 'v2']].each do |coll, ver| - Rails.configuration.Containers.SupportedDockerImageFormats = {ver=>{}} + Rails.configuration.Containers.SupportedDockerImageFormats = ConfigLoader.to_OrderedOptions({ver=>{}}) pdh = collections(coll).portable_data_hash resolved = Container.resolve_container_image(pdh) assert_equal resolved, pdh @@ -602,7 +602,7 @@ class ContainerRequestTest < ActiveSupport::TestCase end test "migrated docker image" do - Rails.configuration.Containers.SupportedDockerImageFormats = {'v2'=>{}} + Rails.configuration.Containers.SupportedDockerImageFormats = ConfigLoader.to_OrderedOptions({'v2'=>{}}) add_docker19_migration_link # Test that it returns only v2 images even though request is for v1 image. @@ -620,7 +620,7 @@ class ContainerRequestTest < ActiveSupport::TestCase end test "use unmigrated docker image" do - Rails.configuration.Containers.SupportedDockerImageFormats = {'v1'=>{}} + Rails.configuration.Containers.SupportedDockerImageFormats = ConfigLoader.to_OrderedOptions({'v1'=>{}}) add_docker19_migration_link # Test that it returns only supported v1 images even though there is a @@ -639,7 +639,7 @@ class ContainerRequestTest < ActiveSupport::TestCase end test "incompatible docker image v1" do - Rails.configuration.Containers.SupportedDockerImageFormats = {'v1'=>{}} + Rails.configuration.Containers.SupportedDockerImageFormats = ConfigLoader.to_OrderedOptions({'v1'=>{}}) add_docker19_migration_link # Don't return unsupported v2 image even if we ask for it directly. @@ -652,7 +652,7 @@ class ContainerRequestTest < ActiveSupport::TestCase end test "incompatible docker image v2" do - Rails.configuration.Containers.SupportedDockerImageFormats = {'v2'=>{}} + Rails.configuration.Containers.SupportedDockerImageFormats = ConfigLoader.to_OrderedOptions({'v2'=>{}}) # No migration link, don't return unsupported v1 image, set_user_from_auth :active diff --git a/services/api/test/unit/job_test.rb b/services/api/test/unit/job_test.rb index 0e8cc48538..c529aab8b6 100644 --- a/services/api/test/unit/job_test.rb +++ b/services/api/test/unit/job_test.rb @@ -117,7 +117,7 @@ class JobTest < ActiveSupport::TestCase 'locator' => BAD_COLLECTION, }.each_pair do |spec_type, image_spec| test "Job validation fails with nonexistent Docker image #{spec_type}" do - Rails.configuration.RemoteClusters = {} + Rails.configuration.RemoteClusters = ConfigLoader.to_OrderedOptions({}) job = Job.new job_attrs(runtime_constraints: {'docker_image' => image_spec}) assert(job.invalid?, "nonexistent Docker image #{spec_type} #{image_spec} was valid") diff --git a/services/api/test/unit/log_test.rb b/services/api/test/unit/log_test.rb index 016a0e4eb4..76d78f9eaa 100644 --- a/services/api/test/unit/log_test.rb +++ b/services/api/test/unit/log_test.rb @@ -282,7 +282,7 @@ class LogTest < ActiveSupport::TestCase end test "non-empty configuration.unlogged_attributes" do - Rails.configuration.AuditLogs.UnloggedAttributes = {"manifest_text"=>{}} + Rails.configuration.AuditLogs.UnloggedAttributes = ConfigLoader.to_OrderedOptions({"manifest_text"=>{}}) txt = ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n" act_as_system_user do @@ -297,7 +297,7 @@ class LogTest < ActiveSupport::TestCase end test "empty configuration.unlogged_attributes" do - Rails.configuration.AuditLogs.UnloggedAttributes = {} + Rails.configuration.AuditLogs.UnloggedAttributes = ConfigLoader.to_OrderedOptions({}) txt = ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n" act_as_system_user do @@ -319,6 +319,7 @@ class LogTest < ActiveSupport::TestCase def assert_no_logs_deleted logs_before = Log.unscoped.all.count + assert logs_before > 0 yield assert_equal logs_before, Log.unscoped.all.count end @@ -350,34 +351,34 @@ class LogTest < ActiveSupport::TestCase # but 3 minutes suits our test data better (and is test-worthy in # that it's expected to work correctly in production). test 'delete old audit logs with production settings' do - initial_log_count = Log.unscoped.all.count + initial_log_count = remaining_audit_logs.count + assert initial_log_count > 0 AuditLogs.delete_old(max_age: 180, max_batch: 100000) assert_operator remaining_audit_logs.count, :<, initial_log_count end test 'delete all audit logs in multiple batches' do + assert remaining_audit_logs.count > 2 AuditLogs.delete_old(max_age: 0.00001, max_batch: 2) assert_equal [], remaining_audit_logs.collect(&:uuid) end test 'delete old audit logs in thread' do - begin - Rails.configuration.AuditLogs.MaxAge = 20 - Rails.configuration.AuditLogs.MaxDeleteBatch = 100000 - Rails.cache.delete 'AuditLogs' - initial_log_count = Log.unscoped.all.count + 1 - act_as_system_user do - Log.create!() - initial_log_count += 1 - end - deadline = Time.now + 10 - while remaining_audit_logs.count == initial_log_count - if Time.now > deadline - raise "timed out" - end - sleep 0.1 + Rails.configuration.AuditLogs.MaxAge = 20 + Rails.configuration.AuditLogs.MaxDeleteBatch = 100000 + Rails.cache.delete 'AuditLogs' + initial_audit_log_count = remaining_audit_logs.count + assert initial_audit_log_count > 0 + act_as_system_user do + Log.create!() + end + deadline = Time.now + 10 + while remaining_audit_logs.count == initial_audit_log_count + if Time.now > deadline + raise "timed out" end - assert_operator remaining_audit_logs.count, :<, initial_log_count + sleep 0.1 end + assert_operator remaining_audit_logs.count, :<, initial_audit_log_count end end diff --git a/services/api/test/unit/user_test.rb b/services/api/test/unit/user_test.rb index 7fcd36d709..b6d66230db 100644 --- a/services/api/test/unit/user_test.rb +++ b/services/api/test/unit/user_test.rb @@ -110,7 +110,7 @@ class UserTest < ActiveSupport::TestCase end test "new username set avoiding blacklist" do - Rails.configuration.Users.AutoSetupUsernameBlacklist = {"root"=>{}} + Rails.configuration.Users.AutoSetupUsernameBlacklist = ConfigLoader.to_OrderedOptions({"root"=>{}}) check_new_username_setting("root", "root2") end @@ -340,48 +340,52 @@ class UserTest < ActiveSupport::TestCase assert_equal(user.first_name, 'first_name_for_newly_created_user_updated') end + active_notify_list = ConfigLoader.to_OrderedOptions({"active-notify@example.com"=>{}}) + inactive_notify_list = ConfigLoader.to_OrderedOptions({"inactive-notify@example.com"=>{}}) + empty_notify_list = ConfigLoader.to_OrderedOptions({}) + test "create new user with notifications" do set_user_from_auth :admin - create_user_and_verify_setup_and_notifications true, {'active-notify-address@example.com'=>{}}, {'inactive-notify-address@example.com'=>{}}, nil, nil - create_user_and_verify_setup_and_notifications true, {'active-notify-address@example.com'=>{}}, {}, nil, nil - create_user_and_verify_setup_and_notifications true, {}, [], nil, nil - create_user_and_verify_setup_and_notifications false, {'active-notify-address@example.com'=>{}}, {'inactive-notify-address@example.com'=>{}}, nil, nil - create_user_and_verify_setup_and_notifications false, {}, {'inactive-notify-address@example.com'=>{}}, nil, nil - create_user_and_verify_setup_and_notifications false, {}, {}, nil, nil + create_user_and_verify_setup_and_notifications true, active_notify_list, inactive_notify_list, nil, nil + create_user_and_verify_setup_and_notifications true, active_notify_list, empty_notify_list, nil, nil + create_user_and_verify_setup_and_notifications true, empty_notify_list, empty_notify_list, nil, nil + create_user_and_verify_setup_and_notifications false, active_notify_list, inactive_notify_list, nil, nil + create_user_and_verify_setup_and_notifications false, empty_notify_list, inactive_notify_list, nil, nil + create_user_and_verify_setup_and_notifications false, empty_notify_list, empty_notify_list, nil, nil end [ # Easy inactive user tests. - [false, {}, {}, "inactive-none@example.com", false, false, "inactivenone"], - [false, {}, {}, "inactive-vm@example.com", true, false, "inactivevm"], - [false, {}, {}, "inactive-repo@example.com", false, true, "inactiverepo"], - [false, {}, {}, "inactive-both@example.com", true, true, "inactiveboth"], + [false, empty_notify_list, empty_notify_list, "inactive-none@example.com", false, false, "inactivenone"], + [false, empty_notify_list, empty_notify_list, "inactive-vm@example.com", true, false, "inactivevm"], + [false, empty_notify_list, empty_notify_list, "inactive-repo@example.com", false, true, "inactiverepo"], + [false, empty_notify_list, empty_notify_list, "inactive-both@example.com", true, true, "inactiveboth"], # Easy active user tests. - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "active-none@example.com", false, false, "activenone"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "active-vm@example.com", true, false, "activevm"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "active-repo@example.com", false, true, "activerepo"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "active-both@example.com", true, true, "activeboth"], + [true, active_notify_list, inactive_notify_list, "active-none@example.com", false, false, "activenone"], + [true, active_notify_list, inactive_notify_list, "active-vm@example.com", true, false, "activevm"], + [true, active_notify_list, inactive_notify_list, "active-repo@example.com", false, true, "activerepo"], + [true, active_notify_list, inactive_notify_list, "active-both@example.com", true, true, "activeboth"], # Test users with malformed e-mail addresses. - [false, {}, {}, nil, true, true, nil], - [false, {}, {}, "arvados", true, true, nil], - [false, {}, {}, "@example.com", true, true, nil], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "*!*@example.com", true, false, nil], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "*!*@example.com", false, false, nil], + [false, empty_notify_list, empty_notify_list, nil, true, true, nil], + [false, empty_notify_list, empty_notify_list, "arvados", true, true, nil], + [false, empty_notify_list, empty_notify_list, "@example.com", true, true, nil], + [true, active_notify_list, inactive_notify_list, "*!*@example.com", true, false, nil], + [true, active_notify_list, inactive_notify_list, "*!*@example.com", false, false, nil], # Test users with various username transformations. - [false, {}, {}, "arvados@example.com", false, false, "arvados2"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "arvados@example.com", false, false, "arvados2"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "root@example.com", true, false, "root2"], - [false, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "root@example.com", true, false, "root2"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "roo_t@example.com", false, true, "root2"], - [false, {}, {}, "^^incorrect_format@example.com", true, true, "incorrectformat"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "&4a_d9.@example.com", true, true, "ad9"], - [true, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "&4a_d9.@example.com", false, false, "ad9"], - [false, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "&4a_d9.@example.com", true, true, "ad9"], - [false, {"active-notify@example.com"=>{}}, {"inactive-notify@example.com"=>{}}, "&4a_d9.@example.com", false, false, "ad9"], + [false, empty_notify_list, empty_notify_list, "arvados@example.com", false, false, "arvados2"], + [true, active_notify_list, inactive_notify_list, "arvados@example.com", false, false, "arvados2"], + [true, active_notify_list, inactive_notify_list, "root@example.com", true, false, "root2"], + [false, active_notify_list, inactive_notify_list, "root@example.com", true, false, "root2"], + [true, active_notify_list, inactive_notify_list, "roo_t@example.com", false, true, "root2"], + [false, empty_notify_list, empty_notify_list, "^^incorrect_format@example.com", true, true, "incorrectformat"], + [true, active_notify_list, inactive_notify_list, "&4a_d9.@example.com", true, true, "ad9"], + [true, active_notify_list, inactive_notify_list, "&4a_d9.@example.com", false, false, "ad9"], + [false, active_notify_list, inactive_notify_list, "&4a_d9.@example.com", true, true, "ad9"], + [false, active_notify_list, inactive_notify_list, "&4a_d9.@example.com", false, false, "ad9"], ].each do |active, new_user_recipients, inactive_recipients, email, auto_setup_vm, auto_setup_repo, expect_username| test "create new user with auto setup #{active} #{email} #{auto_setup_vm} #{auto_setup_repo}" do set_user_from_auth :admin @@ -569,7 +573,6 @@ class UserTest < ActiveSupport::TestCase assert_not_nil resp_user, 'expected user object' assert_not_nil resp_user['uuid'], 'expected user object' assert_equal email, resp_user['email'], 'expected email not found' - end def verify_link (link_object, link_class, link_name, tail_uuid, head_uuid) @@ -648,7 +651,7 @@ class UserTest < ActiveSupport::TestCase if not new_user_recipients.empty? then assert_not_nil new_user_email, 'Expected new user email after setup' assert_equal Rails.configuration.Users.UserNotifierEmailFrom, new_user_email.from[0] - assert_equal new_user_recipients.keys.first, new_user_email.to[0] + assert_equal new_user_recipients.stringify_keys.keys.first, new_user_email.to[0] assert_equal new_user_email_subject, new_user_email.subject else assert_nil new_user_email, 'Did not expect new user email after setup' @@ -658,7 +661,7 @@ class UserTest < ActiveSupport::TestCase if not inactive_recipients.empty? then assert_not_nil new_inactive_user_email, 'Expected new inactive user email after setup' assert_equal Rails.configuration.Users.UserNotifierEmailFrom, new_inactive_user_email.from[0] - assert_equal inactive_recipients.keys.first, new_inactive_user_email.to[0] + assert_equal inactive_recipients.stringify_keys.keys.first, new_inactive_user_email.to[0] assert_equal "#{Rails.configuration.Users.EmailSubjectPrefix}New inactive user notification", new_inactive_user_email.subject else assert_nil new_inactive_user_email, 'Did not expect new inactive user email after setup' @@ -667,7 +670,6 @@ class UserTest < ActiveSupport::TestCase assert_nil new_inactive_user_email, 'Expected no inactive user email after setting up active user' end ActionMailer::Base.deliveries = [] - end def verify_link_exists link_exists, head_uuid, tail_uuid, link_class, link_name, property_name=nil, property_value=nil diff --git a/services/crunch-dispatch-slurm/squeue.go b/services/crunch-dispatch-slurm/squeue.go index 5aee7e087b..eae21e62b6 100644 --- a/services/crunch-dispatch-slurm/squeue.go +++ b/services/crunch-dispatch-slurm/squeue.go @@ -23,8 +23,8 @@ type slurmJob struct { hitNiceLimit bool } -// Squeue implements asynchronous polling monitor of the SLURM queue using the -// command 'squeue'. +// SqueueChecker implements asynchronous polling monitor of the SLURM queue +// using the command 'squeue'. type SqueueChecker struct { Logger logger Period time.Duration @@ -102,13 +102,12 @@ func (sqc *SqueueChecker) reniceAll() { sort.Slice(jobs, func(i, j int) bool { if jobs[i].wantPriority != jobs[j].wantPriority { return jobs[i].wantPriority > jobs[j].wantPriority - } else { - // break ties with container uuid -- - // otherwise, the ordering would change from - // one interval to the next, and we'd do many - // pointless slurm queue rearrangements. - return jobs[i].uuid > jobs[j].uuid } + // break ties with container uuid -- + // otherwise, the ordering would change from + // one interval to the next, and we'd do many + // pointless slurm queue rearrangements. + return jobs[i].uuid > jobs[j].uuid }) renice := wantNice(jobs, sqc.PrioritySpread) for i, job := range jobs { diff --git a/services/keepstore/pull_worker.go b/services/keepstore/pull_worker.go index b4ccd98282..670fa1a414 100644 --- a/services/keepstore/pull_worker.go +++ b/services/keepstore/pull_worker.go @@ -80,7 +80,7 @@ func (h *handler) pullItemAndProcess(pullRequest PullRequest) error { return writePulledBlock(h.volmgr, vol, readContent, pullRequest.Locator) } -// Fetch the content for the given locator using keepclient. +// GetContent fetches the content for the given locator using keepclient. var GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (io.ReadCloser, int64, string, error) { return keepClient.Get(signedLocator) } @@ -88,8 +88,7 @@ var GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) ( var writePulledBlock = func(volmgr *RRVolumeManager, volume Volume, data []byte, locator string) error { if volume != nil { return volume.Put(context.Background(), locator, data) - } else { - _, err := PutBlock(context.Background(), volmgr, data, locator) - return err } + _, err := PutBlock(context.Background(), volmgr, data, locator) + return err } diff --git a/services/keepstore/s3_volume.go b/services/keepstore/s3_volume.go index 235d369b5a..07bb033c9f 100644 --- a/services/keepstore/s3_volume.go +++ b/services/keepstore/s3_volume.go @@ -586,7 +586,10 @@ func (v *S3Volume) IndexTo(prefix string, writer io.Writer) error { if err != nil { return err } - fmt.Fprintf(writer, "%s+%d %d\n", data.Key, data.Size, t.UnixNano()) + // We truncate sub-second precision here. Otherwise + // timestamps will never match the RFC1123-formatted + // Last-Modified values parsed by Mtime(). + fmt.Fprintf(writer, "%s+%d %d\n", data.Key, data.Size, t.Unix()*1000000000) } return dataL.Error() } diff --git a/services/keepstore/s3aws_volume.go b/services/keepstore/s3aws_volume.go index c9fa7fce5e..8d999e7472 100644 --- a/services/keepstore/s3aws_volume.go +++ b/services/keepstore/s3aws_volume.go @@ -33,7 +33,7 @@ import ( "github.com/sirupsen/logrus" ) -// S3Volume implements Volume using an S3 bucket. +// S3AWSVolume implements Volume using an S3 bucket. type S3AWSVolume struct { arvados.S3VolumeDriverParameters AuthToken string // populated automatically when IAMRole is used @@ -69,10 +69,9 @@ func chooseS3VolumeDriver(cluster *arvados.Cluster, volume arvados.Volume, logge if v.UseAWSS3v2Driver { logger.Debugln("Using AWS S3 v2 driver") return newS3AWSVolume(cluster, volume, logger, metrics) - } else { - logger.Debugln("Using goamz S3 driver") - return newS3Volume(cluster, volume, logger, metrics) } + logger.Debugln("Using goamz S3 driver") + return newS3Volume(cluster, volume, logger, metrics) } const ( @@ -728,7 +727,10 @@ func (v *S3AWSVolume) IndexTo(prefix string, writer io.Writer) error { if err := recentL.Error(); err != nil { return err } - fmt.Fprintf(writer, "%s+%d %d\n", *data.Key, *data.Size, stamp.LastModified.UnixNano()) + // We truncate sub-second precision here. Otherwise + // timestamps will never match the RFC1123-formatted + // Last-Modified values parsed by Mtime(). + fmt.Fprintf(writer, "%s+%d %d\n", *data.Key, *data.Size, stamp.LastModified.Unix()*1000000000) } return dataL.Error() } diff --git a/tools/arvbox/bin/arvbox b/tools/arvbox/bin/arvbox index 279d46c08b..122e2bec7c 100755 --- a/tools/arvbox/bin/arvbox +++ b/tools/arvbox/bin/arvbox @@ -107,11 +107,11 @@ wait_for_arvbox() { while read line ; do if [[ $line =~ "ok: down: ready:" ]] ; then kill $LOGPID - set +e - wait $LOGPID 2>/dev/null - set -e - else - echo $line + set +e + wait $LOGPID 2>/dev/null + set -e + else + echo $line fi done < $FF rm $FF @@ -125,7 +125,7 @@ wait_for_arvbox() { docker_run_dev() { docker run \ - "--volume=$ARVADOS_ROOT:/usr/src/arvados:rw" \ + "--volume=$ARVADOS_ROOT:/usr/src/arvados:rw" \ "--volume=$COMPOSER_ROOT:/usr/src/composer:rw" \ "--volume=$WORKBENCH2_ROOT:/usr/src/workbench2:rw" \ "--volume=$PG_DATA:/var/lib/postgresql:rw" \ @@ -136,8 +136,8 @@ docker_run_dev() { "--volume=$NPMCACHE:/var/lib/npm:rw" \ "--volume=$GOSTUFF:/var/lib/gopath:rw" \ "--volume=$RLIBS:/var/lib/Rlibs:rw" \ - --label "org.arvados.arvbox_config=$CONFIG" \ - "$@" + --label "org.arvados.arvbox_config=$CONFIG" \ + "$@" } running_config() { @@ -153,10 +153,10 @@ run() { need_setup=1 if docker ps -a --filter "status=running" | grep -E "$ARVBOX_CONTAINER$" -q ; then - if [[ $(running_config) != "$CONFIG" ]] ; then - echo "Container $ARVBOX_CONTAINER is '$(running_config)' config but requested '$CONFIG'; use restart or reboot" - return 1 - fi + if [[ $(running_config) != "$CONFIG" ]] ; then + echo "Container $ARVBOX_CONTAINER is '$(running_config)' config but requested '$CONFIG'; use restart or reboot" + return 1 + fi if test "$CONFIG" = test -o "$CONFIG" = devenv ; then need_setup=0 else @@ -175,12 +175,12 @@ run() { if test -n "$TAG" then if test $(echo $TAG | cut -c1-1) != '-' ; then - TAG=":$TAG" + TAG=":$TAG" shift else - if [[ $TAG = '-' ]] ; then - shift - fi + if [[ $TAG = '-' ]] ; then + shift + fi unset TAG fi fi @@ -192,7 +192,7 @@ run() { defaultdev=$(/sbin/ip route|awk '/default/ { print $5 }') localip=$(ip addr show $defaultdev | grep 'inet ' | sed 's/ *inet \(.*\)\/.*/\1/') fi - echo "Public arvbox will use address $localip" + echo "Public arvbox will use address $localip" iptemp=$(mktemp) echo $localip > $iptemp chmod og+r $iptemp @@ -207,7 +207,7 @@ run() { --publish=8001:8001 --publish=8002:8002 --publish=4202:4202 - --publish=45000-45020:45000-45020" + --publish=45000-45020:45000-45020" else PUBLIC="" fi @@ -228,7 +228,7 @@ run() { --name=$ARVBOX_CONTAINER \ --privileged \ --volumes-from $ARVBOX_CONTAINER-data \ - --label "org.arvados.arvbox_config=$CONFIG" \ + --label "org.arvados.arvbox_config=$CONFIG" \ $PUBLIC \ arvados/arvbox-demo$TAG updateconf @@ -257,7 +257,7 @@ run() { --detach \ --name=$ARVBOX_CONTAINER \ --privileged \ - "--env=SVDIR=/etc/test-service" \ + "--env=SVDIR=/etc/test-service" \ arvados/arvbox-dev$TAG docker exec -ti \ @@ -271,10 +271,10 @@ run() { /var/lib/arvbox/service/api/run-service --only-setup fi - interactive="" - if [[ -z "$@" ]] ; then - interactive=--interactive - fi + interactive="" + if [[ -z "$@" ]] ; then + interactive=--interactive + fi docker exec -ti \ -e LINES=$(tput lines) \ @@ -282,32 +282,32 @@ run() { -e TERM=$TERM \ -e WORKSPACE=/usr/src/arvados \ -e GEM_HOME=/var/lib/gems \ - -e CONFIGSRC=/var/lib/arvados/run_tests \ + -e CONFIGSRC=/var/lib/arvados/run_tests \ $ARVBOX_CONTAINER \ /usr/local/lib/arvbox/runsu.sh \ /usr/src/arvados/build/run-tests.sh \ --temp /var/lib/arvados/test \ - $interactive \ + $interactive \ "$@" elif [[ "$CONFIG" = devenv ]] ; then - if [[ $need_setup = 1 ]] ; then - docker_run_dev \ + if [[ $need_setup = 1 ]] ; then + docker_run_dev \ --detach \ - --name=${ARVBOX_CONTAINER} \ - "--env=SVDIR=/etc/devenv-service" \ - "--volume=$HOME:$HOME:rw" \ - --volume=/tmp/.X11-unix:/tmp/.X11-unix:rw \ - arvados/arvbox-dev$TAG - fi - exec docker exec --interactive --tty \ - -e LINES=$(tput lines) \ - -e COLUMNS=$(tput cols) \ - -e TERM=$TERM \ - -e "ARVBOX_HOME=$HOME" \ - -e "DISPLAY=$DISPLAY" \ - --workdir=$PWD \ - ${ARVBOX_CONTAINER} \ - /usr/local/lib/arvbox/devenv.sh "$@" + --name=${ARVBOX_CONTAINER} \ + "--env=SVDIR=/etc/devenv-service" \ + "--volume=$HOME:$HOME:rw" \ + --volume=/tmp/.X11-unix:/tmp/.X11-unix:rw \ + arvados/arvbox-dev$TAG + fi + exec docker exec --interactive --tty \ + -e LINES=$(tput lines) \ + -e COLUMNS=$(tput cols) \ + -e TERM=$TERM \ + -e "ARVBOX_HOME=$HOME" \ + -e "DISPLAY=$DISPLAY" \ + --workdir=$PWD \ + ${ARVBOX_CONTAINER} \ + /usr/local/lib/arvbox/devenv.sh "$@" elif [[ "$CONFIG" =~ dev$ ]] ; then docker_run_dev \ --detach \ @@ -318,12 +318,12 @@ run() { updateconf wait_for_arvbox echo "The Arvados source code is checked out at: $ARVADOS_ROOT" - echo "The Arvados testing root certificate is $VAR_DATA/root-cert.pem" - if [[ "$(listusers)" =~ ^\{\} ]] ; then - echo "No users defined, use 'arvbox adduser' to add user logins" - else - echo "Use 'arvbox listusers' to see user logins" - fi + echo "The Arvados testing root certificate is $VAR_DATA/root-cert.pem" + if [[ "$(listusers)" =~ ^\{\} ]] ; then + echo "No users defined, use 'arvbox adduser' to add user logins" + else + echo "Use 'arvbox listusers' to see user logins" + fi else echo "Unknown configuration '$CONFIG'" fi @@ -337,7 +337,7 @@ update() { if test -n "$TAG" then if test $(echo $TAG | cut -c1-1) != '-' ; then - TAG=":$TAG" + TAG=":$TAG" shift else unset TAG @@ -345,9 +345,9 @@ update() { fi if echo "$CONFIG" | grep 'demo$' ; then - docker pull arvados/arvbox-demo$TAG + docker pull arvados/arvbox-demo$TAG else - docker pull arvados/arvbox-dev$TAG + docker pull arvados/arvbox-dev$TAG fi } @@ -421,22 +421,22 @@ case "$subcmd" in sh*) exec docker exec --interactive --tty \ - -e LINES=$(tput lines) \ - -e COLUMNS=$(tput cols) \ - -e TERM=$TERM \ - -e GEM_HOME=/var/lib/gems \ - $ARVBOX_CONTAINER /bin/bash + -e LINES=$(tput lines) \ + -e COLUMNS=$(tput cols) \ + -e TERM=$TERM \ + -e GEM_HOME=/var/lib/gems \ + $ARVBOX_CONTAINER /bin/bash ;; ash*) exec docker exec --interactive --tty \ - -e LINES=$(tput lines) \ - -e COLUMNS=$(tput cols) \ - -e TERM=$TERM \ - -e GEM_HOME=/var/lib/gems \ - -u arvbox \ - -w /usr/src/arvados \ - $ARVBOX_CONTAINER /bin/bash --login + -e LINES=$(tput lines) \ + -e COLUMNS=$(tput cols) \ + -e TERM=$TERM \ + -e GEM_HOME=/var/lib/gems \ + -u arvbox \ + -w /usr/src/arvados \ + $ARVBOX_CONTAINER /bin/bash --login ;; pipe) @@ -463,7 +463,7 @@ case "$subcmd" in update) check $@ stop - update $@ + update $@ run $@ ;; @@ -482,7 +482,7 @@ case "$subcmd" in status) echo "Container: $ARVBOX_CONTAINER" if docker ps -a --filter "status=running" | grep -E "$ARVBOX_CONTAINER$" -q ; then - echo "Cluster id: $(getclusterid)" + echo "Cluster id: $(getclusterid)" echo "Status: running" echo "Container IP: $(getip)" echo "Published host: $(gethost)" @@ -563,17 +563,17 @@ case "$subcmd" in clone) if test -n "$2" ; then - mkdir -p "$ARVBOX_BASE/$2" + mkdir -p "$ARVBOX_BASE/$2" cp -a "$ARVBOX_BASE/$1/passenger" \ - "$ARVBOX_BASE/$1/gems" \ - "$ARVBOX_BASE/$1/pip" \ - "$ARVBOX_BASE/$1/npm" \ - "$ARVBOX_BASE/$1/gopath" \ - "$ARVBOX_BASE/$1/Rlibs" \ - "$ARVBOX_BASE/$1/arvados" \ - "$ARVBOX_BASE/$1/composer" \ - "$ARVBOX_BASE/$1/workbench2" \ - "$ARVBOX_BASE/$2" + "$ARVBOX_BASE/$1/gems" \ + "$ARVBOX_BASE/$1/pip" \ + "$ARVBOX_BASE/$1/npm" \ + "$ARVBOX_BASE/$1/gopath" \ + "$ARVBOX_BASE/$1/Rlibs" \ + "$ARVBOX_BASE/$1/arvados" \ + "$ARVBOX_BASE/$1/composer" \ + "$ARVBOX_BASE/$1/workbench2" \ + "$ARVBOX_BASE/$2" echo "Created new arvbox $2" echo "export ARVBOX_CONTAINER=$2" else @@ -583,28 +583,28 @@ case "$subcmd" in ;; root-cert) - CERT=$PWD/${ARVBOX_CONTAINER}-root-cert.crt - if test -n "$1" ; then - CERT="$1" - fi - docker exec $ARVBOX_CONTAINER cat /var/lib/arvados/root-cert.pem > "$CERT" - echo "Certificate copied to $CERT" - ;; + CERT=$PWD/${ARVBOX_CONTAINER}-root-cert.crt + if test -n "$1" ; then + CERT="$1" + fi + docker exec $ARVBOX_CONTAINER cat /var/lib/arvados/root-cert.pem > "$CERT" + echo "Certificate copied to $CERT" + ;; psql) - exec docker exec -ti $ARVBOX_CONTAINER bash -c 'PGPASSWORD=$(cat /var/lib/arvados/api_database_pw) exec psql --dbname=arvados_development --host=localhost --username=arvados' - ;; + exec docker exec -ti $ARVBOX_CONTAINER bash -c 'PGPASSWORD=$(cat /var/lib/arvados/api_database_pw) exec psql --dbname=arvados_development --host=localhost --username=arvados' + ;; checkpoint) - exec docker exec -ti $ARVBOX_CONTAINER bash -c 'PGPASSWORD=$(cat /var/lib/arvados/api_database_pw) exec pg_dump --host=localhost --username=arvados --clean arvados_development > /var/lib/arvados/checkpoint.sql' - ;; + exec docker exec -ti $ARVBOX_CONTAINER bash -c 'PGPASSWORD=$(cat /var/lib/arvados/api_database_pw) exec pg_dump --host=localhost --username=arvados --clean arvados_development > /var/lib/arvados/checkpoint.sql' + ;; restore) - exec docker exec -ti $ARVBOX_CONTAINER bash -c 'PGPASSWORD=$(cat /var/lib/arvados/api_database_pw) exec psql --dbname=arvados_development --host=localhost --username=arvados --quiet --file=/var/lib/arvados/checkpoint.sql' - ;; + exec docker exec -ti $ARVBOX_CONTAINER bash -c 'PGPASSWORD=$(cat /var/lib/arvados/api_database_pw) exec psql --dbname=arvados_development --host=localhost --username=arvados --quiet --file=/var/lib/arvados/checkpoint.sql' + ;; hotreset) - exec docker exec -i $ARVBOX_CONTAINER /usr/bin/env GEM_HOME=/var/lib/gems /bin/bash - < build arvbox Docker image" echo "reboot stop, build arvbox Docker image, run" echo "rebuild build arvbox Docker image, no layer cache" - echo "checkpoint create database backup" - echo "restore restore checkpoint" - echo "hotreset reset database and restart API without restarting container" + echo "checkpoint create database backup" + echo "restore restore checkpoint" + echo "hotreset reset database and restart API without restarting container" echo "reset delete arvbox arvados data (be careful!)" echo "destroy delete all arvbox code and data (be careful!)" echo "log tail log of specified service" @@ -671,12 +671,12 @@ EOF echo "cat get contents of files inside arvbox" echo "pipe run a bash script piped in from stdin" echo "sv " - echo " change state of service inside arvbox" + echo " change state of service inside arvbox" echo "clone clone dev arvbox" - echo "adduser " - echo " add a user login" - echo "removeuser " - echo " remove user login" - echo "listusers list user logins" + echo "adduser " + echo " add a user login" + echo "removeuser " + echo " remove user login" + echo "listusers list user logins" ;; esac diff --git a/tools/arvbox/lib/arvbox/docker/Dockerfile.base b/tools/arvbox/lib/arvbox/docker/Dockerfile.base index bde5ffe898..eb52ca5a78 100644 --- a/tools/arvbox/lib/arvbox/docker/Dockerfile.base +++ b/tools/arvbox/lib/arvbox/docker/Dockerfile.base @@ -2,30 +2,30 @@ # # SPDX-License-Identifier: AGPL-3.0 -FROM debian:9 +FROM debian:10 ENV DEBIAN_FRONTEND noninteractive RUN apt-get update && \ apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \ - postgresql-9.6 postgresql-contrib-9.6 git build-essential runit curl libpq-dev \ - libcurl4-openssl-dev libssl1.0-dev zlib1g-dev libpcre3-dev libpam-dev \ - openssh-server python-setuptools netcat-traditional \ - python-epydoc graphviz bzip2 less sudo virtualenv \ - libpython-dev fuse libfuse-dev python-pip python-yaml \ - pkg-config libattr1-dev python-pycurl \ + postgresql postgresql-contrib git build-essential runit curl libpq-dev \ + libcurl4-openssl-dev libssl-dev zlib1g-dev libpcre3-dev libpam-dev \ + openssh-server netcat-traditional \ + graphviz bzip2 less sudo virtualenv \ + fuse libfuse-dev \ + pkg-config libattr1-dev \ libwww-perl libio-socket-ssl-perl libcrypt-ssleay-perl \ libjson-perl nginx gitolite3 lsof libreadline-dev \ - apt-transport-https ca-certificates \ - linkchecker python3-virtualenv python-virtualenv xvfb iceweasel \ + apt-transport-https ca-certificates python3-yaml \ + linkchecker python3-virtualenv python3-venv xvfb iceweasel \ libgnutls28-dev python3-dev vim cadaver cython gnupg dirmngr \ libsecret-1-dev r-base r-cran-testthat libxml2-dev pandoc \ - python3-setuptools python3-pip openjdk-8-jdk bsdmainutils net-tools \ - ruby2.3 ruby-dev bundler shellinabox && \ + python3-setuptools python3-pip default-jdk-headless bsdmainutils net-tools \ + ruby ruby-dev bundler shellinabox && \ apt-get clean -ENV RUBYVERSION_MINOR 2.3 -ENV RUBYVERSION 2.3.5 +ENV RUBYVERSION_MINOR 2.5 +ENV RUBYVERSION 2.5.1 # Install Ruby from source # RUN cd /tmp && \ @@ -38,10 +38,9 @@ ENV RUBYVERSION 2.3.5 # rm -rf ruby-${RUBYVERSION} ENV GEM_HOME /var/lib/gems -ENV GEM_PATH /var/lib/gems ENV PATH $PATH:/var/lib/gems/bin -ENV GOVERSION 1.13.6 +ENV GOVERSION 1.15.2 # Install golang binary RUN curl -f http://storage.googleapis.com/golang/go${GOVERSION}.linux-amd64.tar.gz | \ @@ -58,9 +57,9 @@ RUN apt-key add --no-tty /tmp/8D81803C0EBFCD88.asc && \ rm -f /tmp/8D81803C0EBFCD88.asc RUN mkdir -p /etc/apt/sources.list.d && \ - echo deb https://download.docker.com/linux/debian/ stretch stable > /etc/apt/sources.list.d/docker.list && \ + echo deb https://download.docker.com/linux/debian/ buster stable > /etc/apt/sources.list.d/docker.list && \ apt-get update && \ - apt-get -yq --no-install-recommends install docker-ce=17.06.0~ce-0~debian && \ + apt-get -yq --no-install-recommends install docker-ce=5:19.03.13~3-0~debian-buster && \ apt-get clean RUN rm -rf /var/lib/postgresql && mkdir -p /var/lib/postgresql @@ -78,8 +77,6 @@ ENV GDVERSION=v0.23.0 ENV GDURL=https://github.com/mozilla/geckodriver/releases/download/$GDVERSION/geckodriver-$GDVERSION-linux64.tar.gz RUN set -e && curl -L -f ${GDURL} | tar -C /usr/local/bin -xzf - geckodriver -RUN pip install -U setuptools - ENV NODEVERSION v8.15.1 # Install nodejs binary @@ -117,4 +114,4 @@ ADD runit /etc/runit # Start the supervisor. ENV SVDIR /etc/service STOPSIGNAL SIGINT -CMD ["/sbin/runit"] +CMD ["/etc/runit/2"] diff --git a/tools/arvbox/lib/arvbox/docker/common.sh b/tools/arvbox/lib/arvbox/docker/common.sh index e81e8108e2..185467cf7d 100644 --- a/tools/arvbox/lib/arvbox/docker/common.sh +++ b/tools/arvbox/lib/arvbox/docker/common.sh @@ -2,10 +2,9 @@ # # SPDX-License-Identifier: AGPL-3.0 - +export DEBIAN_FRONTEND=noninteractive export PATH=${PATH}:/usr/local/go/bin:/var/lib/gems/bin export GEM_HOME=/var/lib/gems -export GEM_PATH=/var/lib/gems export npm_config_cache=/var/lib/npm export npm_config_cache_min=Infinity export R_LIBS=/var/lib/Rlibs @@ -60,6 +59,10 @@ fi run_bundler() { if test -f Gemfile.lock ; then + # The 'gem install bundler line below' is cf. + # https://bundler.io/blog/2019/05/14/solutions-for-cant-find-gem-bundler-with-executable-bundle.html, + # until we get bundler 2.7.10/3.0.0 or higher + gem install bundler --no-document -v "$(grep -A 1 "BUNDLED WITH" Gemfile.lock | tail -n 1|tr -d ' ')" frozen=--frozen else frozen="" @@ -73,8 +76,8 @@ run_bundler() { # flock /var/lib/gems/gems.lock gem install --verbose --no-document bundler --version ${bundleversion} # fi # fi - if ! flock /var/lib/gems/gems.lock bundler install --verbose --path $GEM_HOME --local --no-deployment $frozen "$@" ; then - flock /var/lib/gems/gems.lock bundler install --verbose --path $GEM_HOME --no-deployment $frozen "$@" + if ! flock /var/lib/gems/gems.lock bundler install --verbose --local --no-deployment $frozen "$@" ; then + flock /var/lib/gems/gems.lock bundler install --verbose --no-deployment $frozen "$@" fi } diff --git a/tools/arvbox/lib/arvbox/docker/createusers.sh b/tools/arvbox/lib/arvbox/docker/createusers.sh index 58fb413582..de1e7bba96 100755 --- a/tools/arvbox/lib/arvbox/docker/createusers.sh +++ b/tools/arvbox/lib/arvbox/docker/createusers.sh @@ -46,7 +46,6 @@ if ! grep "^arvbox:" /etc/passwd >/dev/null 2>/dev/null ; then cat < /etc/profile.d/paths.sh export PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/go/bin:/var/lib/gems/bin:$(ls -d /usr/local/node-*)/bin export GEM_HOME=/var/lib/gems -export GEM_PATH=/var/lib/gems export npm_config_cache=/var/lib/npm export npm_config_cache_min=Infinity export R_LIBS=/var/lib/Rlibs diff --git a/tools/arvbox/lib/arvbox/docker/service/postgres/run-service b/tools/arvbox/lib/arvbox/docker/service/postgres/run-service index a0771aa6a0..f2377a0c2d 100755 --- a/tools/arvbox/lib/arvbox/docker/service/postgres/run-service +++ b/tools/arvbox/lib/arvbox/docker/service/postgres/run-service @@ -6,7 +6,7 @@ exec 2>&1 set -eux -o pipefail -PGVERSION=9.6 +PGVERSION=11 if ! test -d /var/lib/postgresql/$PGVERSION/main ; then /usr/lib/postgresql/$PGVERSION/bin/initdb --locale=en_US.UTF-8 -D /var/lib/postgresql/$PGVERSION/main diff --git a/tools/arvbox/lib/arvbox/docker/service/sdk/run-service b/tools/arvbox/lib/arvbox/docker/service/sdk/run-service index 8a36140bcf..d66bf315b1 100755 --- a/tools/arvbox/lib/arvbox/docker/service/sdk/run-service +++ b/tools/arvbox/lib/arvbox/docker/service/sdk/run-service @@ -20,30 +20,16 @@ ln -sf /usr/src/arvados/sdk/cli/binstubs/arv /usr/local/bin/arv export PYCMD=python3 -# Need to install the upstream version of pip because the python-pip package -# shipped with Debian 9 is patched to change behavior in a way that breaks our -# use case. -# See https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=876145 -# When a non-root user attempts to install system packages, it makes the -# --ignore-installed flag the default (and there is no way to turn it off), -# this has the effect of making it very hard to share dependencies shared among -# multiple packages, because it will blindly install the latest version of each -# dependency requested by each package, even if a compatible package version is -# already installed. -if ! pip3 install --no-index --find-links /var/lib/pip pip==9.0.3 ; then - pip3 install pip==9.0.3 -fi - pip_install wheel cd /usr/src/arvados/sdk/python -python setup.py sdist +$PYCMD setup.py sdist pip_install $(ls dist/arvados-python-client-*.tar.gz | tail -n1) cd /usr/src/arvados/services/fuse -python setup.py sdist +$PYCMD setup.py sdist pip_install $(ls dist/arvados_fuse-*.tar.gz | tail -n1) cd /usr/src/arvados/sdk/cwl -python setup.py sdist +$PYCMD setup.py sdist pip_install $(ls dist/arvados-cwl-runner-*.tar.gz | tail -n1) diff --git a/tools/arvbox/lib/arvbox/docker/yml_override.py b/tools/arvbox/lib/arvbox/docker/yml_override.py index 446448f5eb..7f35ac1d68 100755 --- a/tools/arvbox/lib/arvbox/docker/yml_override.py +++ b/tools/arvbox/lib/arvbox/docker/yml_override.py @@ -20,7 +20,7 @@ with open(fn) as f: def recursiveMerge(a, b): if isinstance(a, dict) and isinstance(b, dict): for k in b: - print k + print(k) a[k] = recursiveMerge(a.get(k), b[k]) return a else: diff --git a/tools/copy-tutorial/copy-tutorial.sh b/tools/copy-tutorial/copy-tutorial.sh index bdc75da2e1..e7fac7af48 100755 --- a/tools/copy-tutorial/copy-tutorial.sh +++ b/tools/copy-tutorial/copy-tutorial.sh @@ -1,25 +1,83 @@ -#!/bin/sh +#!/bin/bash # Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: AGPL-3.0 -set -e +set -e -o pipefail -if test -z "$1" ; then +if test -z "$1" ; then echo "$0: Copies Arvados tutorial resources from public data cluster (jutro)" - echo "Usage: copy-tutorial.sh " - echo " is destination cluster configuration that can be found in ~/.config/arvados" + echo "Usage: copy-tutorial.sh " + echo " is which tutorial to copy, one of:" + echo " bwa-mem Tutorial from https://doc.arvados.org/user/tutorials/tutorial-workflow-workbench.html" + echo " whole-genome Whole genome variant calling tutorial workflow (large)" exit fi -echo "Copying from public data cluster (jutro) to $1" +if test -z "ARVADOS_API_HOST" ; then + echo "Please set ARVADOS_API_HOST to the destination cluster" + exit +fi + +src=jutro +tutorial=$1 + +if ! test -f $HOME/.config/arvados/jutro.conf ; then + # Set it up with the anonymous user token. + echo "ARVADOS_API_HOST=jutro.arvadosapi.com" > $HOME/.config/arvados/jutro.conf + echo "ARVADOS_API_TOKEN=v2/jutro-gj3su-e2o9x84aeg7q005/22idg1m3zna4qe4id3n0b9aw86t72jdw8qu1zj45aboh1mm4ej" >> $HOME/.config/arvados/jutro.conf + exit 1 +fi + +echo +echo "Copying from public data cluster (jutro) to $ARVADOS_API_HOST" +echo + +make_project() { + name="$1" + owner="$2" + if test -z "$owner" ; then + owner=$(arv --format=uuid user current) + fi + project_uuid=$(arv --format=uuid group list --filters '[["name", "=", "'"$name"'"], ["owner_uuid", "=", "'$owner'"]]') + if test -z "$project_uuid" ; then + project_uuid=$(arv --format=uuid group create --group '{"name":"'"$name"'", "group_class": "project", "owner_uuid": "'$owner'"}') + + fi + echo $project_uuid +} -for a in $(cat $HOME/.config/arvados/$1.conf) ; do export $a ; done +copy_jobs_image() { + if ! arv-keepdocker | grep "arvados/jobs *latest" ; then + arv-copy --project-uuid=$parent_project jutro-4zz18-sxmit0qs6i9n2s4 + fi +} -project_uuid=$(arv --format=uuid group create --group '{"name":"User guide resources", "group_class": "project"}') +parent_project=$(make_project "Tutorial projects") +copy_jobs_image -# Bwa-mem workflow -arv-copy --src jutro --dst $1 --project-uuid=$project_uuid f141fc27e7cfa7f7b6d208df5e0ee01b+59 -arv-copy --src jutro --dst $1 --project-uuid=$project_uuid jutro-7fd4e-mkmmq53m1ze6apx +if test "$tutorial" = "bwa-mem" ; then + echo + echo "Copying bwa mem tutorial" + echo -echo "Data copied to \"User guide resources\" at $project_uuid" + arv-copy --project-uuid=$parent_project jutro-j7d0g-rehmt1w5v2p2drp + + echo + echo "Finished, data copied to \"User guide resources\" at $parent_project" + echo "You can now go to Workbench and choose 'Run a process' and then select 'bwa-mem.cwl'" + echo +fi + +if test "$tutorial" = "whole-genome" ; then + echo + echo "Copying whole genome variant calling tutorial" + echo + + arv-copy --project-uuid=$parent_project jutro-j7d0g-n2g87m02rsl4cx2 + + echo + echo "Finished, data copied to \"WGS Processing Tutorial\" at $parent_project" + echo "You can now go to Workbench and choose 'Run a process' and then select 'WGS Processing Tutorial'" + echo +fi