<li role="menuitem"><a href="/groups">
<i class="fa fa-lg fa-users fa-fw"></i> Groups
</a></li>
- <li role="menuitem"><a href="/nodes">
- <i class="fa fa-lg fa-cloud fa-fw"></i> Compute nodes
- </a></li>
<li role="menuitem"><a href="/keep_services">
<i class="fa fa-lg fa-exchange fa-fw"></i> Keep services
</a></li>
- <li role="menuitem"><a href="/keep_disks">
- <i class="fa fa-lg fa-hdd-o fa-fw"></i> Keep disks
- </a></li>
</ul>
</li>
<% end %>
['SSH keys', nil, 'public_key'],
['Links', nil, 'link_class'],
['Groups', nil, 'All users'],
- ['Compute nodes', nil, 'ping_secret'],
['Keep services', nil, 'service_ssl_flag'],
- ['Keep disks', nil, 'bytes_free'],
].each do |page_name, add_button_text, look_for|
test "test system menu #{page_name} link" do
visit page_with_token('admin')
#
# SPDX-License-Identifier: AGPL-3.0
-all: centos7/generated debian10/generated ubuntu1604/generated ubuntu1804/generated ubuntu2004/generated
+all: centos7/generated debian10/generated debian11/generated ubuntu1804/generated ubuntu2004/generated
centos7/generated: common-generated-all
test -d centos7/generated || mkdir centos7/generated
test -d debian10/generated || mkdir debian10/generated
cp -f -rlt debian10/generated common-generated/*
-ubuntu1604/generated: common-generated-all
- test -d ubuntu1604/generated || mkdir ubuntu1604/generated
- cp -f -rlt ubuntu1604/generated common-generated/*
+debian11/generated: common-generated-all
+ test -d debian11/generated || mkdir debian11/generated
+ cp -f -rlt debian11/generated common-generated/*
ubuntu1804/generated: common-generated-all
test -d ubuntu1804/generated || mkdir ubuntu1804/generated
curl -L https://get.rvm.io | bash -s stable && \
/usr/local/rvm/bin/rvm install 2.5 && \
/usr/local/rvm/bin/rvm alias create default ruby-2.5 && \
- /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.0.2 && \
+ /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.2.19 && \
/usr/local/rvm/bin/rvm-exec default gem install fpm --version 1.10.2
# Install Bash 4.4.12 // see https://dev.arvados.org/issues/15612
curl -L https://get.rvm.io | bash -s stable && \
/usr/local/rvm/bin/rvm install 2.5 && \
/usr/local/rvm/bin/rvm alias create default ruby-2.5 && \
- /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.0.2 && \
+ /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.2.19 && \
/usr/local/rvm/bin/rvm-exec default gem install fpm --version 1.10.2
# Install golang binary
#
# SPDX-License-Identifier: AGPL-3.0
-FROM ubuntu:xenial
+## dont use debian:11 here since the word 'bullseye' is used for rvm precompiled binaries
+FROM debian:bullseye
MAINTAINER Arvados Package Maintainers <packaging@arvados.org>
ENV DEBIAN_FRONTEND noninteractive
# Install dependencies.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python3 python-setuptools python3-setuptools python3-pip libcurl4-gnutls-dev libgnutls-dev curl git libattr1-dev libfuse-dev libpq-dev unzip tzdata python3-venv python3-dev libpam-dev
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python3 python3-setuptools python3-pip libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libgnutls28-dev libpq-dev unzip python3-venv python3-dev libpam-dev equivs
# Install virtualenv
RUN /usr/bin/pip3 install 'virtualenv<20'
curl -L https://get.rvm.io | bash -s stable && \
/usr/local/rvm/bin/rvm install 2.5 && \
/usr/local/rvm/bin/rvm alias create default ruby-2.5 && \
- /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.0.2 && \
+ echo "gem: --no-document" >> /etc/gemrc && \
+ /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.2.19 && \
/usr/local/rvm/bin/rvm-exec default gem install fpm --version 1.10.2
# Install golang binary
RUN git clone --depth 1 git://git.arvados.org/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle
ENV WORKSPACE /arvados
-CMD ["/usr/local/rvm/bin/rvm-exec", "default", "bash", "/jenkins/run-build-packages.sh", "--target", "ubuntu1604"]
+CMD ["/usr/local/rvm/bin/rvm-exec", "default", "bash", "/jenkins/run-build-packages.sh", "--target", "debian11"]
curl -L https://get.rvm.io | bash -s stable && \
/usr/local/rvm/bin/rvm install 2.5 && \
/usr/local/rvm/bin/rvm alias create default ruby-2.5 && \
- /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.0.2 && \
+ /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.2.19 && \
/usr/local/rvm/bin/rvm-exec default gem install fpm --version 1.10.2
# Install golang binary
curl -L https://get.rvm.io | bash -s stable && \
/usr/local/rvm/bin/rvm install 2.5 && \
/usr/local/rvm/bin/rvm alias create default ruby-2.5 && \
- /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.0.2 && \
+ /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.2.19 && \
/usr/local/rvm/bin/rvm-exec default gem install fpm --version 1.10.2
# Install golang binary
#
# SPDX-License-Identifier: AGPL-3.0
-all: centos7/generated debian10/generated ubuntu1604/generated ubuntu1804/generated ubuntu2004/generated
+all: centos7/generated debian10/generated debian11/generated ubuntu1804/generated ubuntu2004/generated
centos7/generated: common-generated-all
test -d centos7/generated || mkdir centos7/generated
test -d debian10/generated || mkdir debian10/generated
cp -f -rlt debian10/generated common-generated/*
-ubuntu1604/generated: common-generated-all
- test -d ubuntu1604/generated || mkdir ubuntu1604/generated
- cp -f -rlt ubuntu1604/generated common-generated/*
+debian11/generated: common-generated-all
+ test -d debian11/generated || mkdir debian11/generated
+ cp -f -rlt debian11/generated common-generated/*
ubuntu1804/generated: common-generated-all
test -d ubuntu1804/generated || mkdir ubuntu1804/generated
curl -L https://get.rvm.io | bash -s stable && \
/usr/local/rvm/bin/rvm install 2.5 && \
/usr/local/rvm/bin/rvm alias create default ruby-2.5 && \
- /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.0.2
+ /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.2.9
# Install Bash 4.4.12 // see https://dev.arvados.org/issues/15612
RUN cd /usr/local/src \
curl -L https://get.rvm.io | bash -s stable && \
/usr/local/rvm/bin/rvm install 2.5 && \
/usr/local/rvm/bin/rvm alias create default ruby-2.5 && \
- /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.0.2
+ /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.2.19
# udev daemon can't start in a container, so don't try.
RUN mkdir -p /etc/udev/disabled
#
# SPDX-License-Identifier: AGPL-3.0
-FROM ubuntu:xenial
+FROM debian:bullseye
MAINTAINER Arvados Package Maintainers <packaging@arvados.org>
ENV DEBIAN_FRONTEND noninteractive
# Install dependencies
RUN apt-get update && \
- apt-get -y install --no-install-recommends curl ca-certificates
+ apt-get -y install --no-install-recommends curl ca-certificates gpg procps gpg-agent
# Install RVM
ADD generated/mpapis.asc /tmp/
curl -L https://get.rvm.io | bash -s stable && \
/usr/local/rvm/bin/rvm install 2.5 && \
/usr/local/rvm/bin/rvm alias create default ruby-2.5 && \
- /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.0.2
+ echo "gem: --no-document" >> /etc/gemrc && \
+ /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.2.19
# udev daemon can't start in a container, so don't try.
RUN mkdir -p /etc/udev/disabled
-RUN echo "deb file:///arvados/packages/ubuntu1604/ /" >>/etc/apt/sources.list
-
-# Add preferences file for the Arvados packages. This pins Arvados
-# packages at priority 501, so that older python dependency versions
-# are preferred in those cases where we need them
-ADD etc-apt-preferences.d-arvados /etc/apt/preferences.d/arvados
+RUN echo "deb file:///arvados/packages/debian11/ /" >>/etc/apt/sources.list
+++ /dev/null
-Package: *
-Pin: release o=Arvados
-Pin-Priority: 501
curl -L https://get.rvm.io | bash -s stable && \
/usr/local/rvm/bin/rvm install 2.5 && \
/usr/local/rvm/bin/rvm alias create default ruby-2.5 && \
- /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.0.2
+ /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.2.19
# udev daemon can't start in a container, so don't try.
RUN mkdir -p /etc/udev/disabled
curl -L https://get.rvm.io | bash -s stable && \
/usr/local/rvm/bin/rvm install 2.5 && \
/usr/local/rvm/bin/rvm alias create default ruby-2.5 && \
- /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.0.2
+ /usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.2.19
# udev daemon can't start in a container, so don't try.
RUN mkdir -p /etc/udev/disabled
+++ /dev/null
-deb-common-test-packages.sh
\ No newline at end of file
+++ /dev/null
-deb-common-test-packages.sh
\ No newline at end of file
+++ /dev/null
-deb-common-test-packages.sh
\ No newline at end of file
# initialize git_internal_dir
# usually /var/lib/arvados/internal.git (set in application.default.yml )
if [ "$APPLICATION_READY" = "1" ]; then
- GIT_INTERNAL_DIR=$($COMMAND_PREFIX bundle exec rake config:dump 2>&1 | grep GitInternalDir | awk '{ print $2 }' |tr -d '"')
+ GIT_INTERNAL_DIR=$($COMMAND_PREFIX bin/rake config:dump 2>&1 | grep GitInternalDir | awk '{ print $2 }' |tr -d '"')
if [ ! -e "$GIT_INTERNAL_DIR" ]; then
run_and_report "Creating git_internal_dir '$GIT_INTERNAL_DIR'" \
mkdir -p "$GIT_INTERNAL_DIR"
}
prepare_database() {
- DB_MIGRATE_STATUS=`$COMMAND_PREFIX bundle exec rake db:migrate:status 2>&1 || true`
+ DB_MIGRATE_STATUS=`$COMMAND_PREFIX bin/rake db:migrate:status 2>&1 || true`
if echo "$DB_MIGRATE_STATUS" | grep -qF 'Schema migrations table does not exist yet.'; then
# The database exists, but the migrations table doesn't.
- run_and_report "Setting up database" $COMMAND_PREFIX bundle exec \
- rake "$RAILSPKG_DATABASE_LOAD_TASK" db:seed
+ run_and_report "Setting up database" $COMMAND_PREFIX bin/rake \
+ "$RAILSPKG_DATABASE_LOAD_TASK" db:seed
elif echo "$DB_MIGRATE_STATUS" | grep -q '^database: '; then
run_and_report "Running db:migrate" \
- $COMMAND_PREFIX bundle exec rake db:migrate
+ $COMMAND_PREFIX bin/rake db:migrate
elif echo "$DB_MIGRATE_STATUS" | grep -q 'database .* does not exist'; then
if ! run_and_report "Running db:setup" \
- $COMMAND_PREFIX bundle exec rake db:setup 2>/dev/null; then
+ $COMMAND_PREFIX bin/rake db:setup 2>/dev/null; then
echo "Warning: unable to set up database." >&2
DATABASE_READY=0
fi
cd "$RELEASE_PATH"
export RAILS_ENV=production
- if ! $COMMAND_PREFIX bundle --version >/dev/null; then
- run_and_report "Installing bundler" $COMMAND_PREFIX gem install bundler --version 1.17.3
+ if ! $COMMAND_PREFIX bundle --version >/dev/null 2>&1; then
+ run_and_report "Installing bundler" $COMMAND_PREFIX gem install bundler --version 2.2.19 --no-document
fi
+ run_and_report "Running bundle config set --local path $SHARED_PATH/vendor_bundle" \
+ $COMMAND_PREFIX bin/bundle config set --local path $SHARED_PATH/vendor_bundle
+
run_and_report "Running bundle install" \
- $COMMAND_PREFIX bundle install --path $SHARED_PATH/vendor_bundle --local --quiet
+ $COMMAND_PREFIX bin/bundle install --local --quiet
echo -n "Ensuring directory and file permissions ..."
# Ensure correct ownership of a few files
# warn about config errors (deprecated/removed keys from
# previous version, etc)
run_and_report "Checking configuration for completeness" \
- $COMMAND_PREFIX bundle exec rake config:check || APPLICATION_READY=0
+ $COMMAND_PREFIX bin/rake config:check || APPLICATION_READY=0
else
APPLICATION_READY=0
fi
elif ! [[ "$2" =~ (.*)-(.*) ]]; then
echo >&2 "FATAL: --build-version '$2' does not include an iteration. Try '${2}-1'?"
exit 1
+ elif ! [[ "$2" =~ ^[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+|)(~rc[0-9]+|~dev[0-9]+|)-[0-9]+$ ]]; then
+ echo >&2 "FATAL: --build-version '$2' is invalid, must match pattern ^[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+|)(~rc[0-9]+|~dev[0-9]+|)-[0-9]+$"
+ exit 1
else
ARVADOS_BUILDING_VERSION="${BASH_REMATCH[1]}"
ARVADOS_BUILDING_ITERATION="${BASH_REMATCH[2]}"
arvados-client
arvados-controller
arvados-dispatch-cloud
+ arvados-dispatch-lsf
arvados-docker-cleaner
arvados-git-httpd
arvados-health
"Arvados cluster controller daemon"
package_go_binary cmd/arvados-server arvados-dispatch-cloud \
"Arvados cluster cloud dispatch"
+package_go_binary cmd/arvados-server arvados-dispatch-lsf \
+ "Dispatch Arvados containers to an LSF cluster"
package_go_binary services/arv-git-httpd arvados-git-httpd \
"Provide authenticated http access to Arvados-hosted git repositories"
package_go_binary services/crunch-dispatch-local crunch-dispatch-local \
mv /tmp/x /etc/arvados/config.yml
perl -p -i -e 'BEGIN{undef $/;} s/WebDAV(.*?):\n( *)ExternalURL: ""/WebDAV$1:\n$2ExternalURL: "example.com"/g' /etc/arvados/config.yml
- ARVADOS_CONFIG=none RAILS_ENV=production RAILS_GROUPS=assets bundle exec rake npm:install >"$STDOUT_IF_DEBUG"
- ARVADOS_CONFIG=none RAILS_ENV=production RAILS_GROUPS=assets bundle exec rake assets:precompile >"$STDOUT_IF_DEBUG"
+ ARVADOS_CONFIG=none RAILS_ENV=production RAILS_GROUPS=assets bin/rake npm:install >"$STDOUT_IF_DEBUG"
+ ARVADOS_CONFIG=none RAILS_ENV=production RAILS_GROUPS=assets bin/rake assets:precompile >"$STDOUT_IF_DEBUG"
# Remove generated configuration files so they don't go in the package.
rm -rf /etc/arvados/
elif [[ "$FORMAT" == "deb" ]]; then
declare -A dd
dd[debian10]=buster
- dd[ubuntu1604]=xenial
+ dd[debian11]=bullseye
dd[ubuntu1804]=bionic
dd[ubuntu2004]=focal
D=${dd[$TARGET]}
LICENSE_STRING=`grep license $WORKSPACE/$PKG_DIR/setup.py|cut -f2 -d=|sed -e "s/[',\\"]//g"`
COMMAND_ARR+=('--license' "$LICENSE_STRING")
+ if [[ "$FORMAT" == "rpm" ]]; then
+ # Make sure to conflict with the old rh-python36 packages we used to publish
+ COMMAND_ARR+=('--conflicts' "rh-python36-python-$PKG")
+ fi
+
if [[ "$DEBUG" != "0" ]]; then
COMMAND_ARR+=('--verbose' '--log' 'info')
fi
fi
# the python3-arvados-cwl-runner package comes with cwltool, expose that version
- if [[ -e "$WORKSPACE/$PKG_DIR/dist/build/usr/share/$python/dist/python-arvados-cwl-runner/bin/cwltool" ]]; then
- COMMAND_ARR+=("usr/share/$python/dist/python-arvados-cwl-runner/bin/cwltool=/usr/bin/")
+ if [[ -e "$WORKSPACE/$PKG_DIR/dist/build/usr/share/$python/dist/$PYTHON_PKG/bin/cwltool" ]]; then
+ COMMAND_ARR+=("usr/share/$python/dist/$PYTHON_PKG/bin/cwltool=/usr/bin/")
fi
COMMAND_ARR+=(".")
|| fatal 'rvm gemset setup'
rvm env
- (bundle version | grep -q 2.0.2) || gem install bundler -v 2.0.2
+ (bundle version | grep -q 2.2.19) || gem install bundler -v 2.2.19
bundle="$(which bundle)"
echo "$bundle"
- "$bundle" version | grep 2.0.2 || fatal 'install bundler'
+ "$bundle" version | grep 2.2.19 || fatal 'install bundler'
else
# When our "bundle install"s need to install new gems to
# satisfy dependencies, we want them to go where "gem install
(
export HOME=$GEMHOME
bundlers="$(gem list --details bundler)"
- versions=(1.16.6 1.17.3 2.0.2)
+ versions=(2.2.19)
for v in ${versions[@]}; do
if ! echo "$bundlers" | fgrep -q "($v)"; then
gem install --user $(for v in ${versions[@]}; do echo bundler:${v}; done)
After=network.target
AssertPathExists=/etc/arvados/config.yml
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
After=network.target
AssertPathExists=/etc/arvados/config.yml
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+[Unit]
+Description=arvados-dispatch-lsf
+Documentation=https://doc.arvados.org/
+After=network.target
+AssertPathExists=/etc/arvados/config.yml
+
+# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
+StartLimitIntervalSec=0
+
+[Service]
+Type=notify
+EnvironmentFile=-/etc/arvados/environment
+ExecStart=/usr/bin/arvados-dispatch-lsf
+# Set a reasonable default for the open file limit
+LimitNOFILE=65536
+Restart=always
+RestartSec=1
+
+# systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
+StartLimitInterval=0
+
+[Install]
+WantedBy=multi-user.target
After=network.target
AssertPathExists=/etc/arvados/config.yml
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
"git.arvados.org/arvados.git/lib/crunchrun"
"git.arvados.org/arvados.git/lib/dispatchcloud"
"git.arvados.org/arvados.git/lib/install"
+ "git.arvados.org/arvados.git/lib/lsf"
"git.arvados.org/arvados.git/lib/recovercollection"
"git.arvados.org/arvados.git/services/ws"
)
"controller": controller.Command,
"crunch-run": crunchrun.Command,
"dispatch-cloud": dispatchcloud.Command,
+ "dispatch-lsf": lsf.DispatchCommand,
"install": install.Command,
"init": install.InitCommand,
"recover-collection": recovercollection.Command,
- install/crunch2-slurm/configure-slurm.html.textile.liquid
- install/crunch2-slurm/install-compute-node.html.textile.liquid
- install/crunch2-slurm/install-test.html.textile.liquid
+ - Containers API (lsf):
+ - install/crunch2-lsf/install-dispatch.html.textile.liquid
- Additional configuration:
+ - install/singularity.html.textile.liquid
- install/container-shell-access.html.textile.liquid
- External dependencies:
- install/install-postgresql.html.textile.liquid
<notextile>
<pre><code>~$ <span class="userinput">cd /var/www/arvados-api/current</span>
-$ <span class="userinput">sudo -u <b>webserver-user</b> RAILS_ENV=production bundle exec script/create_superuser_token.rb</span>
+$ <span class="userinput">sudo -u <b>webserver-user</b> RAILS_ENV=production bin/bundle exec script/create_superuser_token.rb</span>
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
</code></pre>
</notextile>
This template recognizes four variables:
* railshost: The hostname included in the prompt, to let the user know where to run the command. If this is the empty string, no hostname will be displayed. Default "apiserver".
* railsdir: The directory included in the prompt, to let the user know where to run the command. Default "/var/www/arvados-api/current".
-* railscmd: The full command to run. Default "bundle exec rails console".
+* railscmd: The full command to run. Default "bin/rails console".
* railsout: The expected output of the command, if any.
{% endcomment %} Change *@webserver-user@* to the user that runs your web server process. If you install Phusion Passenger as we recommend, this is *@www-data@* on Debian-based systems, and *@nginx@* on Red Hat-based systems.
{% endunless %}
{% unless railscmd %}
- {% assign railscmd = "bundle exec rails console" %}
+ {% assign railscmd = "bin/rails console" %}
{% endunless %}
<notextile>
h3. Debian and Ubuntu
-Ubuntu 16.04 (xenial) ships with Ruby 2.3, which is not supported by Arvados. Use "RVM":#rvm to install Ruby 2.5 or later.
-
Debian 10 (buster) and Ubuntu 18.04 (bionic) and later ship with Ruby 2.5, which is supported by Arvados.
<notextile>
Change to the API server directory and use the following commands:
<pre>
-$ RAILS_ENV=production bundle exec rake config:migrate > config.yml
+$ RAILS_ENV=production bin/rake config:migrate > config.yml
$ cp config.yml /etc/arvados/config.yml
</pre>
If you wish to update @config.yml@ configuration by hand, or check that everything has been migrated, use @config:diff@ to print configuration items that differ between @application.yml@ and the system @config.yml@.
<pre>
-$ RAILS_ENV=production bundle exec rake config:diff
+$ RAILS_ENV=production bin/rake config:diff
</pre>
This command will also report if no migrations are required.
Change to the workbench server directory and use the following commands:
<pre>
-$ RAILS_ENV=production bundle exec rake config:migrate > config.yml
+$ RAILS_ENV=production bin/rake config:migrate > config.yml
$ cp config.yml /etc/arvados/config.yml
</pre>
If you wish to update @config.yml@ configuration by hand, or check that everything has been migrated, use @config:diff@ to print configuration items that differ between @application.yml@ and the system @config.yml@.
<pre>
-$ RAILS_ENV=production bundle exec rake config:diff
+$ RAILS_ENV=production bin/rake config:diff
</pre>
This command will also report if no migrations are required.
Storage classes (alternately known as "storage tiers") allow you to control which volumes should be used to store particular collection data blocks. This can be used to implement data storage policies such as moving data to archival storage.
-The storage classes for each volume are set in the per-volume "keepstore configuration":{{site.baseurl}}/install/install-keepstore.html
+In the default Arvados configuration, with no storage classes specified in the configuration file, all volumes belong to a single implicit storage class called "default". Apart from that, names of storage classes are internal to the cluster and decided by the administrator. Other than the implicit "default" class, Arvados currently does not define any standard storage class names.
+
+To use multiple storage classes, update the @StorageClasses@ and @Volumes@ sections of your configuration file.
+* Every storage class you use (including "default") must be defined in the @StorageClasses@ section.
+* The @StorageClasses@ section must use @Default: true@ to indicate at least one default storage class. When a client/user does not specify storage classes when creating a new collection, the default storage classes are used implicitly.
+* If some storage classes are faster or cheaper to access than others, assign a higher @Priority@ to the faster ones. When reading data, volumes with high priority storage classes are searched first.
+
+Example:
<pre>
+ StorageClasses:
+
+ default:
+ # When reading a block that is stored on multiple volumes,
+ # prefer a volume with this class.
+ Priority: 20
+
+ # When a client does not specify a storage class when saving a
+ # new collection, use this one.
+ Default: true
+
+ archival:
+ Priority: 10
+
Volumes:
+
ClusterID-nyw5e-000000000000000:
# This volume is in the "default" storage class.
StorageClasses:
default: true
+
ClusterID-nyw5e-000000000000001:
- # Specify this volume is in the "archival" storage class.
+ # This volume is in the "archival" storage class.
StorageClasses:
archival: true
</pre>
-Names of storage classes are internal to the cluster and decided by the administrator. Aside from "default", Arvados currently does not define any standard storage class names.
+Refer to the "configuration reference":{{site.baseurl}}/admin/config.html for more details.
h3. Using storage classes
The @db:check_long_lived_tokens@ task will list which users have tokens with no expiration date.
<notextile>
-<pre><code># <span class="userinput">bundle exec rake db:check_long_lived_tokens</span>
+<pre><code># <span class="userinput">bin/rake db:check_long_lived_tokens</span>
Found 6 long-lived tokens from users:
user2,user2@example.com,zzzzz-tpzed-5vzt5wc62k46p6r
admin,admin@example.com,zzzzz-tpzed-6drplgwq9nm5cox
To apply the new policy to existing tokens, use the @db:fix_long_lived_tokens@ task.
<notextile>
-<pre><code># <span class="userinput">bundle exec rake db:fix_long_lived_tokens</span>
+<pre><code># <span class="userinput">bin/rake db:fix_long_lived_tokens</span>
Setting token expiration to: 2020-08-25 03:30:50 +0000
6 tokens updated.
</code></pre>
"Upgrading from 2.2.0":#v2_2_0
-h3. crunch-dispatch-local now requires config.yml
+h3. Storage classes must be defined explicitly
-The @crunch-dispatch-local@ dispatcher now reads the API host and token from the system wide @/etc/arvados/config.yml@ . It will fail to start that file is not found or not readable.
+If your configuration uses the StorageClasses attribute on any Keep volumes, you must add a new @StorageClasses@ section that lists all of your storage classes. Refer to the updated documentation about "configuring storage classes":{{site.baseurl}}/admin/storage-classes.html for details.
-h2(#v2_2_0). v2.2.0 (2021-06-03)
+h3. keep-balance requires access to PostgreSQL
-"Upgrading from 2.1.0":#v2_1_0
+Make sure the keep-balance process can connect to your PostgreSQL server using the settings in your config file. (In previous versions, keep-balance accessed the database through controller instead of connecting to the database server directly.)
+
+h3. crunch-dispatch-local now requires config.yml
+
+The @crunch-dispatch-local@ dispatcher now reads the API host and token from the system wide @/etc/arvados/config.yml@ . It will fail to start that file is not found or not readable.
h3. Multi-file docker image collections
Typically a docker image collection contains a single @.tar@ file at the top level. Handling of atypical cases has changed. If a docker image collection contains files with extensions other than @.tar@, they will be ignored (previously they could cause errors). If a docker image collection contains multiple @.tar@ files, it will cause an error at runtime, "cannot choose from multiple tar files in image collection" (previously one of the @.tar@ files was selected). Subdirectories are ignored. The @arv keep docker@ command always creates a collection with a single @.tar@ file, and never uses subdirectories, so this change will not affect most users.
+h2(#v2_2_0). v2.2.0 (2021-06-03)
+
+"Upgrading from 2.1.0":#v2_1_0
+
h3. New spelling of S3 credential configs
If you use the S3 driver for Keep volumes and specify credentials in your configuration file (as opposed to using an IAM role), you should change the spelling of the @AccessKey@ and @SecretKey@ config keys to @AccessKeyID@ and @SecretAccessKey@. If you don't update them, the previous spellings will still be accepted, but warnings will be logged at server startup.
When serving files that will render directly in the browser, it is important to properly configure the keep-web service to migitate cross-site-scripting (XSS) attacks. A HTML page can be stored in a collection. If an attacker causes a victim to visit that page through Workbench, the HTML will be rendered by the browser. If all collections are served at the same domain, the browser will consider collections as coming from the same origin, which will grant access to the same browsing data (cookies and local storage). This would enable malicious Javascript on that page to access Arvados on behalf of the victim.
-This can be mitigated by having separate domains for each collection, or limiting preview to circumstances where the collection is not accessed with the user's regular full-access token. For cluster administrators that understand the risks, this protection can also be turned off.
+This can be mitigated by having separate domains for each collection, or limiting preview to circumstances where the collection is not accessed with the user's regular full-access token. For clusters where this risk is acceptable, this protection can also be turned off by setting the @Collections/TrustAllContent@ configuration flag to true, see the "configuration reference":../admin/config.html for more detail.
The following "same origin" URL patterns are supported for public collections and collections shared anonymously via secret links (i.e., collections which can be served by keep-web without making use of any implicit credentials like cookies). See "Same-origin URLs" below.
This mainly affects Workbench's ability to show inline content, so it should be taken into account when configuring both services' URL schemes.
-You can read more about the definition of a _same-site_ request at the "RFC 6265bis-03 page":https://tools.ietf.org/html/draft-ietf-httpbis-rfc6265bis-03#section-5.2
\ No newline at end of file
+You can read more about the definition of a _same-site_ request at the "RFC 6265bis-03 page":https://tools.ietf.org/html/draft-ietf-httpbis-rfc6265bis-03#section-5.2
--- /dev/null
+---
+layout: default
+navsection: installguide
+title: Install the LSF dispatcher
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+{% include 'notebox_begin_warning' %}
+arvados-dispatch-lsf is only relevant for on premises clusters that will spool jobs to LSF. Skip this section if you are installing a cloud cluster.
+{% include 'notebox_end' %}
+
+Containers can be dispatched to an LSF cluster. The dispatcher sends work to the cluster using LSF's @bsub@ command, so it works in a variety of LSF configurations.
+
+*LSF support is currently considered experimental.*
+
+Limitations include:
+* Arvados container priority is not propagated to LSF job priority. This can cause inefficient use of compute resources, and even deadlock if there are fewer compute nodes than concurrent Arvados workflows.
+* Combining LSF with docker may not work, depending on LSF configuration and user/group IDs (if LSF only sets up the configured user's primary group ID when executing the crunch-run process on a compute node, it may not have permission to connect to the docker daemon).
+
+In order to run containers, you must choose a user that has permission to set up FUSE mounts and run Singularity/Docker containers on each compute node. This install guide refers to this user as the @crunch@ user. We recommend you create this user on each compute node with the same UID and GID, and add it to the @fuse@ and @docker@ system groups to grant it the necessary permissions. However, you can run the dispatcher under any account with sufficient permissions across the cluster.
+
+Set up all of your compute nodes "as you would for a SLURM cluster":../crunch2-slurm/install-compute-node.html.
+
+
+h2(#update-config). Update config.yml
+
+Arvados-dispatch-lsf reads the common configuration file at @/etc/arvados/config.yml@.
+
+Review the following configuration parameters and adjust as needed.
+
+
+h3(#BsubSudoUser). Containers.LSF.BsubSudoUser
+
+arvados-dispatch-lsf uses @sudo@ to execute @bsub@, for example @sudo -E -u crunch bsub [...]@. This means the @crunch@ account must exist on the hosts where LSF jobs run ("execution hosts"), as well as on the host where you are installing the Arvados LSF dispatcher (the "submission host"). To use a user account other than @crunch@, configure @BsubSudoUser@:
+
+<notextile>
+<pre> Containers:
+ LSF:
+ <code class="userinput">BsubSudoUser: <b>lsfuser</b>
+</code></pre>
+</notextile>
+
+Alternatively, you can arrange for the arvados-dispatch-lsf process to run as an unprivileged user that has a corresponding account on all compute nodes, and disable the use of @sudo@ by specifying an empty string:
+
+<notextile>
+<pre> Containers:
+ LSF:
+ # Don't use sudo
+ <code class="userinput">BsubSudoUser: <b>""</b>
+</code></pre>
+</notextile>
+
+
+h3(#SbatchArguments). Containers.LSF.BsubArgumentsList
+
+When arvados-dispatch-lsf invokes @bsub@, you can add arguments to the command by specifying @BsubArgumentsList@. You can use this to send the jobs to specific cluster partitions or add resource requests. Set @BsubArgumentsList@ to an array of strings. For example:
+
+<notextile>
+<pre> Containers:
+ LSF:
+ <code class="userinput">BsubArgumentsList: <b>["-C", "0"]</b></code>
+</pre>
+</notextile>
+
+
+h3(#PollPeriod). Containers.PollInterval
+
+arvados-dispatch-lsf polls the API server periodically for new containers to run. The @PollInterval@ option controls how often this poll happens. Set this to a string of numbers suffixed with one of the time units @s@, @m@, or @h@. For example:
+
+<notextile>
+<pre> Containers:
+ <code class="userinput">PollInterval: <b>10s</b>
+</code></pre>
+</notextile>
+
+
+h3(#ReserveExtraRAM). Containers.ReserveExtraRAM: Extra RAM for jobs
+
+Extra RAM to reserve (in bytes) on each LSF job submitted by Arvados, which is added to the amount specified in the container's @runtime_constraints@. If not provided, the default value is zero.
+
+Supports suffixes @KB@, @KiB@, @MB@, @MiB@, @GB@, @GiB@, @TB@, @TiB@, @PB@, @PiB@, @EB@, @EiB@ (where @KB@ is 10[^3^], @KiB@ is 2[^10^], @MB@ is 10[^6^], @MiB@ is 2[^20^] and so forth).
+
+<notextile>
+<pre> Containers:
+ <code class="userinput">ReserveExtraRAM: <b>256MiB</b></code>
+</pre>
+</notextile>
+
+
+h3(#CrunchRunCommand-network). Containers.CrunchRunArgumentList: Using host networking for containers
+
+Older Linux kernels (prior to 3.18) have bugs in network namespace handling which can lead to compute node lockups. This by is indicated by blocked kernel tasks in "Workqueue: netns cleanup_net". If you are experiencing this problem, as a workaround you can disable use of network namespaces by Docker across the cluster. Be aware this reduces container isolation, which may be a security risk.
+
+<notextile>
+<pre> Containers:
+ <code class="userinput">CrunchRunArgumentsList:
+ - <b>"-container-enable-networking=always"</b>
+ - <b>"-container-network-mode=host"</b></code>
+</pre>
+</notextile>
+
+{% assign arvados_component = 'arvados-dispatch-lsf' %}
+
+{% include 'install_packages' %}
+
+{% include 'start_service' %}
+
+{% include 'restart_api' %}
h3(#ReserveExtraRAM). Containers.ReserveExtraRAM: Extra RAM for jobs
-Extra RAM to reserve (in bytes) on each Slurm job submitted by Arvados, which is added to the amount specified in the container's @runtime_constraints@. If not provided, the default value is zero. Helpful when using @-cgroup-parent-subsystem@, where @crunch-run@ and @arv-mount@ share the control group memory limit with the user process. In this situation, at least 256MiB is recommended to accomodate each container's @crunch-run@ and @arv-mount@ processes.
+Extra RAM to reserve (in bytes) on each Slurm job submitted by Arvados, which is added to the amount specified in the container's @runtime_constraints@. If not provided, the default value is zero. Helpful when using @-cgroup-parent-subsystem@, where @crunch-run@ and @arv-mount@ share the control group memory limit with the user process. In this situation, at least 256MiB is recommended to accommodate each container's @crunch-run@ and @arv-mount@ processes.
Supports suffixes @KB@, @KiB@, @MB@, @MiB@, @GB@, @GiB@, @TB@, @TiB@, @PB@, @PiB@, @EB@, @EiB@ (where @KB@ is 10[^3^], @KiB@ is 2[^10^], @MB@ is 10[^6^], @MiB@ is 2[^20^] and so forth).
h3. Test configuration
-notextile. <pre><code>$ <span class="userinput">sudo -u git -i bash -c 'cd /var/www/arvados-api/current && bundle exec script/arvados-git-sync.rb production'</span></code></pre>
+notextile. <pre><code>$ <span class="userinput">sudo -u git -i bash -c 'cd /var/www/arvados-api/current && bin/bundle exec script/arvados-git-sync.rb production'</span></code></pre>
h3. Enable the synchronization script
Create @/etc/cron.d/arvados-git-sync@ with the following content:
<notextile>
-<pre><code><span class="userinput">*/5 * * * * git cd /var/www/arvados-api/current && bundle exec script/arvados-git-sync.rb production</span>
+<pre><code><span class="userinput">*/5 * * * * git cd /var/www/arvados-api/current && bin/bundle exec script/arvados-git-sync.rb production</span>
</code></pre>
</notextile>
Keep-balance deletes unreferenced and overreplicated blocks from Keep servers, makes additional copies of underreplicated blocks, and moves blocks into optimal locations as needed (e.g., after adding new servers). See "Balancing Keep servers":{{site.baseurl}}/admin/keep-balance.html for usage details.
-Keep-balance can be installed anywhere with network access to Keep services. Typically it runs on the same host as keepproxy.
+Keep-balance can be installed anywhere with network access to Keep services, arvados-controller, and PostgreSQL. Typically it runs on the same host as keepproxy.
*A cluster should have only one instance of keep-balance running at a time.*
{% include 'notebox_begin' %}
-If you are installing keep-balance on an existing system with valuable data, you can run keep-balance in "dry run" mode first and review its logs as a precaution. To do this, edit your keep-balance startup script to use the flags @-commit-pulls=false -commit-trash=false@.
+If you are installing keep-balance on an existing system with valuable data, you can run keep-balance in "dry run" mode first and review its logs as a precaution. To do this, edit your keep-balance startup script to use the flags @-commit-pulls=false -commit-trash=false -commit-confirmed-fields=false@.
{% include 'notebox_end' %}
{% include 'notebox_begin' %}
Whether you choose to serve collections from their own subdomain or from a single domain, it's important to keep in mind that they should be served from me same _site_ as Workbench for the inline previews to work.
-Please check "keep-web's URL pattern guide":/api/keep-web-urls.html#same-site to learn more.
+Please check "keep-web's URL pattern guide":../api/keep-web-urls.html#same-site to learn more.
{% include 'notebox_end' %}
h2. Set InternalURLs
h2(#update-config). Configure anonymous user token
-{% assign railscmd = "bundle exec ./script/get_anonymous_user_token.rb --get" %}
+{% assign railscmd = "bin/bundle exec ./script/get_anonymous_user_token.rb --get" %}
{% assign railsout = "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" %}
If you intend to use Keep-web to serve public data to anonymous clients, configure it with an anonymous token.
table(table table-bordered table-condensed).
|_. Distribution|_. State|_. Last supported version|
|CentOS 7|Supported|Latest|
+|Debian 11 ("bullseye")|Supported|Latest|
|Debian 10 ("buster")|Supported|Latest|
|Ubuntu 20.04 ("focal")|Supported|Latest|
|Ubuntu 18.04 ("bionic")|Supported|Latest|
h2(#dnstls). DNS entries and TLS certificates
-The following services are normally public-facing and require DNS entries and corresponding TLS certificates. Get certificates from your preferred TLS certificate provider. We recommend using "Let's Encrypt":https://letsencrypt.org/. You can run several services on same node, but each distinct hostname requires its own TLS certificate.
+The following services are normally public-facing and require DNS entries and corresponding TLS certificates. Get certificates from your preferred TLS certificate provider. We recommend using "Let's Encrypt":https://letsencrypt.org/. You can run several services on the same node, but each distinct DNS name requires a valid, matching TLS certificate.
-This guide uses the following hostname conventions. A later part of this guide will describe how to set up Nginx virtual hosts.
+This guide uses the following DNS name conventions. A later part of this guide will describe how to set up Nginx virtual hosts.
<div class="offset1">
table(table table-bordered table-condensed).
-|_. Function|_. Hostname|
+|_. Function|_. DNS name|
|Arvados API|@ClusterID.example.com@|
|Arvados Git server|git.@ClusterID.example.com@|
+|Arvados Webshell|webshell.@ClusterID.example.com@|
|Arvados Websockets endpoint|ws.@ClusterID.example.com@|
|Arvados Workbench|workbench.@ClusterID.example.com@|
|Arvados Workbench 2|workbench2.@ClusterID.example.com@|
|Arvados Keepproxy server|keep.@ClusterID.example.com@|
|Arvados Keep-web server|download.@ClusterID.example.com@
_and_
-*.collections.@ClusterID.example.com@ or
-*<notextile>--</notextile>collections.@ClusterID.example.com@ or
+*.collections.@ClusterID.example.com@ _or_
+*<notextile>--</notextile>collections.@ClusterID.example.com@ _or_
collections.@ClusterID.example.com@ (see the "keep-web install docs":install-keep-web.html)|
</div>
+Setting up Arvados is easiest when Wildcard TLS and wildcard DNS are available. It is also possible to set up Arvados without wildcard TLS and DNS, but not having a wildcard for @keep-web@ (i.e. not having *.collections.@ClusterID.example.com@) comes with a tradeoff: it will disable some features that allow users to view Arvados-hosted data in their browsers. More information on this tradeoff caused by the CORS rules applied by modern browsers is available in the "keep-web URL pattern guide":../api/keep-web-urls.html.
+
+The table below lists the required TLS certificates and DNS names in each scenario.
+
+<div class="offset1">
+table(table table-bordered table-condensed).
+||_. Wildcard TLS and DNS available|_. Wildcard TLS available|_. Other|
+|TLS|*.@ClusterID.example.com@
+@ClusterID.example.com@
+*.collections.@ClusterID.example.com@|*.@ClusterID.example.com@
+@ClusterID.example.com@|@ClusterID.example.com@
+git.@ClusterID.example.com@
+webshell.@ClusterID.example.com@
+ws.@ClusterID.example.com@
+workbench.@ClusterID.example.com@
+workbench2.@ClusterID.example.com@
+keep.@ClusterID.example.com@
+download.@ClusterID.example.com@
+collections.@ClusterID.example.com@|
+|DNS|@ClusterID.example.com@
+git.@ClusterID.example.com@
+webshell.@ClusterID.example.com@
+ws.@ClusterID.example.com@
+workbench.@ClusterID.example.com@
+workbench2.@ClusterID.example.com@
+keep.@ClusterID.example.com@
+download.@ClusterID.example.com@
+*.collections.@ClusterID.example.com@|@ClusterID.example.com@
+git.@ClusterID.example.com@
+webshell.@ClusterID.example.com@
+ws.@ClusterID.example.com@
+workbench.@ClusterID.example.com@
+workbench2.@ClusterID.example.com@
+keep.@ClusterID.example.com@
+download.@ClusterID.example.com@
+collections.@ClusterID.example.com@|@ClusterID.example.com@
+git.@ClusterID.example.com@
+webshell.@ClusterID.example.com@
+ws.@ClusterID.example.com@
+workbench.@ClusterID.example.com@
+workbench2.@ClusterID.example.com@
+keep.@ClusterID.example.com@
+download.@ClusterID.example.com@
+collections.@ClusterID.example.com@|
+</div>
+
{% include 'notebox_begin' %}
It is also possible to create your own certificate authority, issue server certificates, and install a custom root certificate in the browser. This is out of scope for this guide.
{% include 'notebox_end' %}
table(table table-bordered table-condensed).
|_. OS version|_. Command|
+|Debian 11 ("bullseye")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/buster bullseye main" | tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
|Debian 10 ("buster")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/buster buster main" | tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
|Ubuntu 20.04 ("focal")[1]|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/focal focal main" | tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
|Ubuntu 18.04 ("bionic")[1]|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/bionic bionic main" | tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
--- /dev/null
+---
+layout: default
+navsection: installguide
+title: Singularity container runtime
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+Arvados can be configured to use "Singularity":https://sylabs.io/singularity/ instead of Docker to execute containers on cloud nodes or a SLURM/LSF cluster. Singularity may be preferable due to its simpler installation and lack of long-running daemon process and special system users/groups.
+
+Please note:
+* *Singularity support is currently considered experimental.*
+* Even when using the singularity runtime, users' container images are expected to be saved in Docker format using @arv keep docker@. Arvados converts the Docker image to Singularity format (@.sif@) at runtime as needed. Specifying a @.sif@ file as an image when submitting a container request is not yet supported.
+* Singularity does not limit the amount of memory available in a container. Each container will have access to all memory on the host where it runs, unless memory use is restricted by SLURM/LSF.
+* Programs running in containers may behave differently due to differences between Singularity and Docker.
+** The root (image) filesystem is read-only in a Singularity container. Programs that attempt to write outside a designated output or temporary directory are likely to fail.
+** The Docker ENTRYPOINT instruction is ignored.
+* Arvados is currently tested with Singularity version 3.5.2.
+
+To use singularity, first make sure "Singularity is installed":https://sylabs.io/guides/3.5/user-guide/quick_start.html on your cloud worker image or SLURM/LSF compute nodes as applicable. Note @squashfs-tools@ is required.
+
+<notextile>
+<pre><code>$ <span class="userinput">singularity version</span>
+3.5.2
+$ <span class="userinput">mksquashfs -version</span>
+mksquashfs version 4.3-git (2014/06/09)
+[...]
+</code></pre>
+</notextile>
+
+Then update @Containers.RuntimeEngine@ in your cluster configuration:
+
+<notextile>
+<pre><code> # Container runtime: "docker" (default) or "singularity" (experimental)
+ RuntimeEngine: singularity
+</code></pre>
+</notextile>
+
+Restart your dispatcher (@crunch-dispatch-slurm@, @arvados-dispatch-cloud@, or @arvados-dispatch-lsf@) after updating your configuration file.
{% codeblock as yaml %}
hints:
arv:RunInSingleContainer: {}
+
arv:RuntimeConstraints:
keep_cache: 123456
outputDirType: keep_output_dir
+
arv:PartitionRequirement:
partition: dev_partition
+
arv:APIRequirement: {}
- cwltool:LoadListingRequirement:
- loadListing: shallow_listing
+
arv:IntermediateOutput:
outputTTL: 3600
- arv:ReuseRequirement:
- enableReuse: false
+
cwltool:Secrets:
secrets: [input1, input2]
- cwltool:TimeLimit:
- timelimit: 14400
+
arv:WorkflowRunnerResources:
ramMin: 2048
coresMin: 2
keep_cache: 512
+
arv:ClusterTarget:
cluster_id: clsr1
project_uuid: clsr1-j7d0g-qxc4jcji7n4lafx
+
+ arv:OutputStorageClass:
+ intermediateStorageClass: fast_storage
+ finalStorageClass: robust_storage
{% endcodeblock %}
h2(#RunInSingleContainer). arv:RunInSingleContainer
table(table table-bordered table-condensed).
|_. Field |_. Type |_. Description |
|outputTTL|int|If the value is greater than zero, consider intermediate output collections to be temporary and should be automatically trashed. Temporary collections will be trashed @outputTTL@ seconds after creation. A value of zero means intermediate output should be retained indefinitely (this is the default behavior).
-Note: arvados-cwl-runner currently does not take workflow dependencies into account when setting the TTL on an intermediate output collection. If the TTL is too short, it is possible for a collection to be trashed before downstream steps that consume it are started. The recommended minimum value for TTL is the expected duration of the entire the workflow.|
+Note: arvados-cwl-runner currently does not take workflow dependencies into account when setting the TTL on an intermediate output collection. If the TTL is too short, it is possible for a collection to be trashed before downstream steps that consume it are started. The recommended minimum value for TTL is the expected duration of the entire workflow.|
h2. cwltool:Secrets
|cluster_id|string|The five-character alphanumeric cluster id (uuid prefix) where a container or subworkflow will execute. May be an expression.|
|project_uuid|string|The uuid of the project which will own container request and output of the container. May be an expression.|
+h2(#OutputStorageClass). arv:OutputStorageClass
+
+Specify the "storage class":{{site.baseurl}}/user/topics/storage-classes.html to use for intermediate and final outputs.
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|intermediateStorageClass|string or array of strings|The storage class for output of intermediate steps. For example, faster "hot" storage.|
+|finalStorageClass_uuid|string or array of strings|The storage class for the final output. |
+
h2. arv:dockerCollectionPDH
This is an optional extension field appearing on the standard @DockerRequirement@. It specifies the portable data hash of the Arvados collection containing the Docker image. If present, it takes precedence over @dockerPull@ or @dockerImageId@.
The following extensions are deprecated because equivalent features are part of the CWL v1.1 standard.
+{% codeblock as yaml %}
+hints:
+ cwltool:LoadListingRequirement:
+ loadListing: shallow_listing
+ arv:ReuseRequirement:
+ enableReuse: false
+ cwltool:TimeLimit:
+ timelimit: 14400
+{% endcodeblock %}
+
h2. cwltool:LoadListingRequirement
For CWL v1.1 scripts, this is deprecated in favor of "loadListing":https://www.commonwl.org/v1.1/CommandLineTool.html#CommandInputParameter or "LoadListingRequirement":https://www.commonwl.org/v1.1/CommandLineTool.html#LoadListingRequirement
|==--no-wait==| Submit workflow runner and exit.|
|==--log-timestamps==| Prefix logging lines with timestamp|
|==--no-log-timestamps==| No timestamp on logging lines|
-|==--api== {containers}|Select work submission API. Only supports 'containers'|
|==--compute-checksum==| Compute checksum of contents while collecting outputs|
|==--submit-runner-ram== SUBMIT_RUNNER_RAM|RAM (in MiB) required for the workflow runner (default 1024)|
|==--submit-runner-image== SUBMIT_RUNNER_IMAGE|Docker image for workflow runner|
|==--always-submit-runner==|When invoked with --submit --wait, always submit a runner to manage the workflow, even when only running a single CommandLineTool|
-|==--submit-request-uuid== UUID|Update and commit to supplied container request instead of creating a new one (containers API only).|
-|==--submit-runner-cluster== CLUSTER_ID|Submit workflow runner to a remote cluster (containers API only)|
+|==--submit-request-uuid== UUID|Update and commit to supplied container request instead of creating a new one.|
+|==--submit-runner-cluster== CLUSTER_ID|Submit workflow runner to a remote cluster|
|==--name NAME==|Name to use for workflow execution instance.|
|==--on-error== {stop,continue}|Desired workflow behavior when a step fails. One of 'stop' (do not submit any more steps) or 'continue' (may submit other steps that are not downstream from the error). Default is 'continue'.|
|==--enable-dev==|Enable loading and running development versions of CWL spec.|
-|==--storage-classes== STORAGE_CLASSES|Specify comma separated list of storage classes to be used when saving workflow output to Keep.|
+|==--storage-classes== STORAGE_CLASSES|Specify comma separated list of storage classes to be used when saving the final workflow output to Keep.|
+|==--intermediate-storage-classes== STORAGE_CLASSES|Specify comma separated list of storage classes to be used when intermediate workflow output to Keep.|
|==--intermediate-output-ttl== N|If N > 0, intermediate output collections will be trashed N seconds after creation. Default is 0 (don't trash).|
-|==--priority== PRIORITY|Workflow priority (range 1..1000, higher has precedence over lower, containers api only)|
+|==--priority== PRIORITY|Workflow priority (range 1..1000, higher has precedence over lower)|
|==--thread-count== THREAD_COUNT|Number of threads to use for container submit and output collection.|
|==--http-timeout== HTTP_TIMEOUT|API request timeout in seconds. Default is 300 seconds (5 minutes).|
|==--trash-intermediate==|Immediately trash intermediate outputs on workflow success.|
The output of arv-copy displays the uuid of the collection generated in the destination cluster. By default, the output is placed in your home project in the destination cluster. If you want to place your collection in an existing project, you can specify the project you want it to be in using the tag @--project-uuid@ followed by the project uuid.
-For example, this will copy the collection to project dstcl-j7d0g-a894213ukjhal12 in the destination cluster.
+For example, this will copy the collection to project @dstcl-j7d0g-a894213ukjhal12@ in the destination cluster.
<notextile> <pre><code>~$ <span class="userinput">arv-copy --src pirca --dst dstcl --project-uuid dstcl-j7d0g-a894213ukjhal12 jutro-4zz18-tv416l321i4r01e
</code></pre>
</notextile>
+Additionally, if you need to specify the storage classes where to save the copied data on the destination cluster, you can do that by using the @--storage-classes LIST@ argument, where @LIST@ is a comma-separated list of storage class names.
+
h3. How to copy a workflow
We will use the uuid @jutro-7fd4e-mkmmq53m1ze6apx@ as an example workflow.
---
layout: default
navsection: userguide
-title: "Working with Docker images"
+title: "Working with container images"
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-This page describes how to set up the runtime environment (e.g., the programs, libraries, and other dependencies needed to run a job) that a workflow step will be run in using "Docker.":https://www.docker.com/ Docker is a tool for building and running containers that isolate applications from other applications running on the same node. For detailed information about Docker, see the "Docker User Guide.":https://docs.docker.com/userguide/
+This page describes how to set up the runtime environment (e.g., the programs, libraries, and other dependencies needed to run a job) that a workflow step will be run in using "Docker":https://www.docker.com/ or "Singularity":https://sylabs.io/singularity/. Docker and Singularity are tools for building and running containers that isolate applications from other applications running on the same node. For detailed information, see the "Docker User Guide":https://docs.docker.com/userguide/ and the "Introduction to Singularity":https://sylabs.io/guides/3.5/user-guide/introduction.html.
+
+Note that Arvados always works with Docker images, even when it is configured to use Singularity to run containers. There are some differences between the two runtimes that can affect your containers. See the "Singularity container runtime":{{site.baseurl}}/install/singularity.html page for details.
This page describes:
{% include 'tutorial_expectations_workstation' %}
-You also need ensure that "Docker is installed,":https://docs.docker.com/installation/ the Docker daemon is running, and you have permission to access Docker. You can test this by running @docker version@. If you receive a permission denied error, your user account may need to be added to the @docker@ group. If you have root access, you can add yourself to the @docker@ group using @$ sudo addgroup $USER docker@ then log out and log back in again; otherwise consult your local sysadmin.
+You also need to ensure that "Docker is installed,":https://docs.docker.com/installation/ the Docker daemon is running, and you have permission to access Docker. You can test this by running @docker version@. If you receive a permission denied error, your user account may need to be added to the @docker@ group. If you have root access, you can add yourself to the @docker@ group using @$ sudo addgroup $USER docker@ then log out and log back in again; otherwise consult your local sysadmin.
h2(#create). Create a custom image using a Dockerfile
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-Storage classes (alternately known as "storage tiers") allow you to control which volumes should be used to store particular collection data blocks. This can be used to implement data storage policies such as moving data to archival storage.
+Storage classes (sometimes called as "storage tiers") allow you to control which back-end storage volumes should be used to store the data blocks of a particular collection. This can be used to implement data storage policies such as assigning data collections to "fast", "robust" or "archival" storage.
-Names of storage classes are internal to the cluster and decided by the administrator. Aside from "default", Arvados currently does not define any standard storage class names.
+Names of storage classes are internal to the cluster and decided by the administrator. Aside from "default", Arvados currently does not define any standard storage class names. Consult your cluster administrator for guidance on what storage classes are available to use on your specific Arvados instance.
+
+Note that when changing the storage class of an existing collection, it does not take effect immediately, the blocks are asynchronously copied to the new storage class and removed from the old one. The collection field "storage_classes_confirmed" is updated to reflect when data blocks have been successfully copied.
h3. arv-put
h3. arvados-cwl-runner
-You may also specify the desired storage class for the final output collection produced by @arvados-cwl-runner@:
+You may specify the desired storage class for the intermediate and final output collections produced by @arvados-cwl-runner@ on the command line or using the "arv:OutputStorageClass hint":{{site.baseurl}}/user/cwl/cwl-extensions.html#OutputStorageClass .
<pre>
-$ arvados-cwl-runner --storage-classes=hot myworkflow.cwl myinput.yml
+$ arvados-cwl-runner --intermediate-storage-classes=hot_storage --storage-classes=robust_storage myworkflow.cwl myinput.yml
</pre>
-(Note: intermediate collections produced by a workflow run will have "default" storage class.)
-
h3. arv command line
You may set the storage class on an existing collection by setting the "storage_classes_desired" field of a Collection. For example, at the command line:
h3. Storage class notes
-Collection blocks will be in the "default" storage class if not otherwise specified.
+Collection blocks will be in the cluster's configured default storage class(es) if not otherwise specified.
Any user with write access to a collection may set any storage class on that collection.
-
-Names of storage classes are internal to the cluster and decided by the administrator. Aside from "default", Arvados currently does not define any standard storage class names.
if err != nil {
return err
}
- for _, version := range []string{"1.16.6", "1.17.3", "2.0.2"} {
+ for _, version := range []string{"2.2.19"} {
if !strings.Contains(buf.String(), "("+version+")") {
- err = super.RunProgram(ctx, runner.src, runOptions{}, "gem", "install", "--user", "--conservative", "--no-document", "bundler:1.16.6", "bundler:1.17.3", "bundler:2.0.2")
+ err = super.RunProgram(ctx, runner.src, runOptions{}, "gem", "install", "--user", "--conservative", "--no-document", "bundler:2.2.19")
if err != nil {
return err
}
},
}
}
+ cluster.StorageClasses = map[string]arvados.StorageClassConfig{
+ "default": {Default: true},
+ "foo": {},
+ "bar": {},
+ }
}
if super.OwnTemporaryDatabase {
cluster.PostgreSQL.Connection = arvados.PostgreSQLConnection{
DispatchCloud:
InternalURLs: {SAMPLE: {}}
ExternalURL: "-"
+ DispatchLSF:
+ InternalURLs: {SAMPLE: {}}
+ ExternalURL: "-"
Keepproxy:
InternalURLs: {SAMPLE: {}}
ExternalURL: ""
AdminNotifierEmailFrom: arvados@example.com
EmailSubjectPrefix: "[ARVADOS] "
UserNotifierEmailFrom: arvados@example.com
+ UserNotifierEmailBcc: {}
NewUserNotificationRecipients: {}
NewInactiveUserNotificationRecipients: {}
# long-running balancing operation.
BalanceTimeout: 6h
+ # Maximum number of replication_confirmed /
+ # storage_classes_confirmed updates to write to the database
+ # after a rebalancing run. When many updates are needed, this
+ # spreads them over a few runs rather than applying them all at
+ # once.
+ BalanceUpdateLimit: 100000
+
# Default lifetime for ephemeral collections: 2 weeks. This must not
# be less than BlobSigningTTL.
DefaultTrashLifetime: 336h
# WebDAV would have to expose XSS vulnerabilities in order to
# handle the redirect (see discussion on Services.WebDAV).
#
- # This setting has no effect in the recommended configuration,
- # where the WebDAV is configured to have a separate domain for
- # every collection; in this case XSS protection is provided by
- # browsers' same-origin policy.
+ # This setting has no effect in the recommended configuration, where the
+ # WebDAV service is configured to have a separate domain for every
+ # collection and XSS protection is provided by browsers' same-origin
+ # policy.
#
# The default setting (false) is appropriate for a multi-user site.
TrustAllContent: false
# (See http://ruby-doc.org/core-2.2.2/Kernel.html#method-i-format for more.)
AssignNodeHostname: "compute%<slot_number>d"
+ LSF:
+ # Additional arguments to bsub when submitting Arvados
+ # containers as LSF jobs.
+ BsubArgumentsList: []
+
+ # Use sudo to switch to this user account when submitting LSF
+ # jobs.
+ #
+ # This account must exist on the hosts where LSF jobs run
+ # ("execution hosts"), as well as on the host where the
+ # Arvados LSF dispatcher runs ("submission host").
+ BsubSudoUser: "crunch"
+
JobsAPI:
# Enable the legacy 'jobs' API (crunch v1). This value must be a string.
#
Price: 0.1
Preemptible: false
+ StorageClasses:
+
+ # If you use multiple storage classes, specify them here, using
+ # the storage class name as the key (in place of "SAMPLE" in
+ # this sample entry).
+ #
+ # Further info/examples:
+ # https://doc.arvados.org/admin/storage-classes.html
+ SAMPLE:
+
+ # Priority determines the order volumes should be searched
+ # when reading data, in cases where a keepstore server has
+ # access to multiple volumes with different storage classes.
+ Priority: 0
+
+ # Default determines which storage class(es) should be used
+ # when a user/client writes data or saves a new collection
+ # without specifying storage classes.
+ #
+ # If any StorageClasses are configured, at least one of them
+ # must have Default: true.
+ Default: true
+
Volumes:
SAMPLE:
# AccessViaHosts specifies which keepstore processes can read
ReadOnly: false
Replication: 1
StorageClasses:
- default: true
+ # If you have configured storage classes (see StorageClasses
+ # section above), add an entry here for each storage class
+ # satisfied by this volume.
SAMPLE: true
Driver: S3
DriverParameters:
"Collections.BalanceCollectionBuffers": false,
"Collections.BalancePeriod": false,
"Collections.BalanceTimeout": false,
+ "Collections.BalanceUpdateLimit": false,
"Collections.BlobDeleteConcurrency": false,
"Collections.BlobMissingReport": false,
"Collections.BlobReplicateConcurrency": false,
"Containers.JobsAPI.GitInternalDir": false,
"Containers.Logging": false,
"Containers.LogReuseDecisions": false,
+ "Containers.LSF": false,
"Containers.MaxComputeVMs": false,
"Containers.MaxDispatchAttempts": false,
"Containers.MaxRetryAttempts": true,
"Services.*": true,
"Services.*.ExternalURL": true,
"Services.*.InternalURLs": false,
+ "StorageClasses": true,
+ "StorageClasses.*": true,
+ "StorageClasses.*.Default": true,
+ "StorageClasses.*.Priority": true,
"SystemLogs": false,
"SystemRootToken": false,
"TLS": false,
"Users.NewUsersAreActive": false,
"Users.PreferDomainForUsername": false,
"Users.UserNotifierEmailFrom": false,
+ "Users.UserNotifierEmailBcc": false,
"Users.UserProfileNotificationAddress": false,
"Users.UserSetupMailText": false,
"Volumes": true,
DispatchCloud:
InternalURLs: {SAMPLE: {}}
ExternalURL: "-"
+ DispatchLSF:
+ InternalURLs: {SAMPLE: {}}
+ ExternalURL: "-"
Keepproxy:
InternalURLs: {SAMPLE: {}}
ExternalURL: ""
AdminNotifierEmailFrom: arvados@example.com
EmailSubjectPrefix: "[ARVADOS] "
UserNotifierEmailFrom: arvados@example.com
+ UserNotifierEmailBcc: {}
NewUserNotificationRecipients: {}
NewInactiveUserNotificationRecipients: {}
# long-running balancing operation.
BalanceTimeout: 6h
+ # Maximum number of replication_confirmed /
+ # storage_classes_confirmed updates to write to the database
+ # after a rebalancing run. When many updates are needed, this
+ # spreads them over a few runs rather than applying them all at
+ # once.
+ BalanceUpdateLimit: 100000
+
# Default lifetime for ephemeral collections: 2 weeks. This must not
# be less than BlobSigningTTL.
DefaultTrashLifetime: 336h
# WebDAV would have to expose XSS vulnerabilities in order to
# handle the redirect (see discussion on Services.WebDAV).
#
- # This setting has no effect in the recommended configuration,
- # where the WebDAV is configured to have a separate domain for
- # every collection; in this case XSS protection is provided by
- # browsers' same-origin policy.
+ # This setting has no effect in the recommended configuration, where the
+ # WebDAV service is configured to have a separate domain for every
+ # collection and XSS protection is provided by browsers' same-origin
+ # policy.
#
# The default setting (false) is appropriate for a multi-user site.
TrustAllContent: false
# (See http://ruby-doc.org/core-2.2.2/Kernel.html#method-i-format for more.)
AssignNodeHostname: "compute%<slot_number>d"
+ LSF:
+ # Additional arguments to bsub when submitting Arvados
+ # containers as LSF jobs.
+ BsubArgumentsList: []
+
+ # Use sudo to switch to this user account when submitting LSF
+ # jobs.
+ #
+ # This account must exist on the hosts where LSF jobs run
+ # ("execution hosts"), as well as on the host where the
+ # Arvados LSF dispatcher runs ("submission host").
+ BsubSudoUser: "crunch"
+
JobsAPI:
# Enable the legacy 'jobs' API (crunch v1). This value must be a string.
#
Price: 0.1
Preemptible: false
+ StorageClasses:
+
+ # If you use multiple storage classes, specify them here, using
+ # the storage class name as the key (in place of "SAMPLE" in
+ # this sample entry).
+ #
+ # Further info/examples:
+ # https://doc.arvados.org/admin/storage-classes.html
+ SAMPLE:
+
+ # Priority determines the order volumes should be searched
+ # when reading data, in cases where a keepstore server has
+ # access to multiple volumes with different storage classes.
+ Priority: 0
+
+ # Default determines which storage class(es) should be used
+ # when a user/client writes data or saves a new collection
+ # without specifying storage classes.
+ #
+ # If any StorageClasses are configured, at least one of them
+ # must have Default: true.
+ Default: true
+
Volumes:
SAMPLE:
# AccessViaHosts specifies which keepstore processes can read
ReadOnly: false
Replication: 1
StorageClasses:
- default: true
+ # If you have configured storage classes (see StorageClasses
+ # section above), add an entry here for each storage class
+ # satisfied by this volume.
SAMPLE: true
Driver: S3
DriverParameters:
ldr.loadOldKeepBalanceConfig,
)
}
+ loadFuncs = append(loadFuncs, ldr.setImplicitStorageClasses)
for _, f := range loadFuncs {
err = f(&cfg)
if err != nil {
checkKeyConflict(fmt.Sprintf("Clusters.%s.PostgreSQL.Connection", id), cc.PostgreSQL.Connection),
ldr.checkEmptyKeepstores(cc),
ldr.checkUnlistedKeepstores(cc),
+ ldr.checkStorageClasses(cc),
// TODO: check non-empty Rendezvous on
// services other than Keepstore
} {
return nil
}
+func (ldr *Loader) setImplicitStorageClasses(cfg *arvados.Config) error {
+cluster:
+ for id, cc := range cfg.Clusters {
+ if len(cc.StorageClasses) > 0 {
+ continue cluster
+ }
+ for _, vol := range cc.Volumes {
+ if len(vol.StorageClasses) > 0 {
+ continue cluster
+ }
+ }
+ // No explicit StorageClasses config info at all; fill
+ // in implicit defaults.
+ for id, vol := range cc.Volumes {
+ vol.StorageClasses = map[string]bool{"default": true}
+ cc.Volumes[id] = vol
+ }
+ cc.StorageClasses = map[string]arvados.StorageClassConfig{"default": {Default: true}}
+ cfg.Clusters[id] = cc
+ }
+ return nil
+}
+
+func (ldr *Loader) checkStorageClasses(cc arvados.Cluster) error {
+ classOnVolume := map[string]bool{}
+ for volid, vol := range cc.Volumes {
+ if len(vol.StorageClasses) == 0 {
+ return fmt.Errorf("%s: volume has no StorageClasses listed", volid)
+ }
+ for classid := range vol.StorageClasses {
+ if _, ok := cc.StorageClasses[classid]; !ok {
+ return fmt.Errorf("%s: volume refers to storage class %q that is not defined in StorageClasses", volid, classid)
+ }
+ classOnVolume[classid] = true
+ }
+ }
+ haveDefault := false
+ for classid, sc := range cc.StorageClasses {
+ if !classOnVolume[classid] && len(cc.Volumes) > 0 {
+ ldr.Logger.Warnf("there are no volumes providing storage class %q", classid)
+ }
+ if sc.Default {
+ haveDefault = true
+ }
+ }
+ if !haveDefault {
+ return fmt.Errorf("there is no default storage class (at least one entry in StorageClasses must have Default: true)")
+ }
+ return nil
+}
+
func checkKeyConflict(label string, m map[string]string) error {
saw := map[string]bool{}
for k := range m {
var _ = check.Suite(&LoadSuite{})
+var emptyConfigYAML = `Clusters: {"z1111": {}}`
+
// Return a new Loader that reads cluster config from configdata
// (instead of the usual default /etc/arvados/config.yml), and logs to
// logdst or (if that's nil) c.Log.
}
func (s *LoadSuite) TestNoConfigs(c *check.C) {
- cfg, err := testLoader(c, `Clusters: {"z1111": {}}`, nil).Load()
+ cfg, err := testLoader(c, emptyConfigYAML, nil).Load()
c.Assert(err, check.IsNil)
c.Assert(cfg.Clusters, check.HasLen, 1)
cc, err := cfg.GetCluster("z1111")
f, err = ioutil.TempFile("", "")
c.Check(err, check.IsNil)
defer os.Remove(f.Name())
- io.WriteString(f, "Clusters: {aaaaa: {}}\n")
+ io.WriteString(f, emptyConfigYAML)
newfile := f.Name()
for _, trial := range []struct {
c.Errorf("Should have produced an error")
}
- var logbuf bytes.Buffer
- loader := testLoader(c, string(DefaultYAML), &logbuf)
+ loader := testLoader(c, string(DefaultYAML), nil)
cfg, err := loader.Load()
c.Assert(err, check.IsNil)
if err := checkListKeys("", cfg); err != nil {
c.Error(err)
}
}
+
+func (s *LoadSuite) TestImplicitStorageClasses(c *check.C) {
+ // If StorageClasses and Volumes.*.StorageClasses are all
+ // empty, there is a default storage class named "default".
+ ldr := testLoader(c, `{"Clusters":{"z1111":{}}}`, nil)
+ cfg, err := ldr.Load()
+ c.Assert(err, check.IsNil)
+ cc, err := cfg.GetCluster("z1111")
+ c.Assert(err, check.IsNil)
+ c.Check(cc.StorageClasses, check.HasLen, 1)
+ c.Check(cc.StorageClasses["default"].Default, check.Equals, true)
+ c.Check(cc.StorageClasses["default"].Priority, check.Equals, 0)
+
+ // The implicit "default" storage class is used by all
+ // volumes.
+ ldr = testLoader(c, `
+Clusters:
+ z1111:
+ Volumes:
+ z: {}`, nil)
+ cfg, err = ldr.Load()
+ c.Assert(err, check.IsNil)
+ cc, err = cfg.GetCluster("z1111")
+ c.Assert(err, check.IsNil)
+ c.Check(cc.StorageClasses, check.HasLen, 1)
+ c.Check(cc.StorageClasses["default"].Default, check.Equals, true)
+ c.Check(cc.StorageClasses["default"].Priority, check.Equals, 0)
+ c.Check(cc.Volumes["z"].StorageClasses["default"], check.Equals, true)
+
+ // The "default" storage class isn't implicit if any classes
+ // are configured explicitly.
+ ldr = testLoader(c, `
+Clusters:
+ z1111:
+ StorageClasses:
+ local:
+ Default: true
+ Priority: 111
+ Volumes:
+ z:
+ StorageClasses:
+ local: true`, nil)
+ cfg, err = ldr.Load()
+ c.Assert(err, check.IsNil)
+ cc, err = cfg.GetCluster("z1111")
+ c.Assert(err, check.IsNil)
+ c.Check(cc.StorageClasses, check.HasLen, 1)
+ c.Check(cc.StorageClasses["local"].Default, check.Equals, true)
+ c.Check(cc.StorageClasses["local"].Priority, check.Equals, 111)
+
+ // It is an error for a volume to refer to a storage class
+ // that isn't listed in StorageClasses.
+ ldr = testLoader(c, `
+Clusters:
+ z1111:
+ StorageClasses:
+ local:
+ Default: true
+ Priority: 111
+ Volumes:
+ z:
+ StorageClasses:
+ nx: true`, nil)
+ _, err = ldr.Load()
+ c.Assert(err, check.ErrorMatches, `z: volume refers to storage class "nx" that is not defined.*`)
+
+ // It is an error for a volume to refer to a storage class
+ // that isn't listed in StorageClasses ... even if it's
+ // "default", which would exist implicitly if it weren't
+ // referenced explicitly by a volume.
+ ldr = testLoader(c, `
+Clusters:
+ z1111:
+ Volumes:
+ z:
+ StorageClasses:
+ default: true`, nil)
+ _, err = ldr.Load()
+ c.Assert(err, check.ErrorMatches, `z: volume refers to storage class "default" that is not defined.*`)
+
+ // If the "default" storage class is configured explicitly, it
+ // is not used implicitly by any volumes, even if it's the
+ // only storage class.
+ var logbuf bytes.Buffer
+ ldr = testLoader(c, `
+Clusters:
+ z1111:
+ StorageClasses:
+ default:
+ Default: true
+ Priority: 111
+ Volumes:
+ z: {}`, &logbuf)
+ _, err = ldr.Load()
+ c.Assert(err, check.ErrorMatches, `z: volume has no StorageClasses listed`)
+
+ // If StorageClasses are configured explicitly, there must be
+ // at least one with Default: true. (Calling one "default" is
+ // not sufficient.)
+ ldr = testLoader(c, `
+Clusters:
+ z1111:
+ StorageClasses:
+ default:
+ Priority: 111
+ Volumes:
+ z:
+ StorageClasses:
+ default: true`, nil)
+ _, err = ldr.Load()
+ c.Assert(err, check.ErrorMatches, `there is no default storage class.*`)
+}
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadostest"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "git.arvados.org/arvados.git/sdk/go/httpserver"
check "gopkg.in/check.v1"
)
}
}
+func (s *IntegrationSuite) TestRequestIDHeader(c *check.C) {
+ conn1 := s.testClusters["z1111"].Conn()
+ rootctx1, _, _ := s.testClusters["z1111"].RootClients()
+ userctx1, ac1, _, _ := s.testClusters["z1111"].UserClients(rootctx1, c, conn1, "user@example.com", true)
+
+ coll, err := conn1.CollectionCreate(userctx1, arvados.CreateOptions{})
+ c.Check(err, check.IsNil)
+ specimen, err := conn1.SpecimenCreate(userctx1, arvados.CreateOptions{})
+ c.Check(err, check.IsNil)
+
+ tests := []struct {
+ path string
+ reqIdProvided bool
+ notFoundRequest bool
+ }{
+ {"/arvados/v1/collections", false, false},
+ {"/arvados/v1/collections", true, false},
+ {"/arvados/v1/nonexistant", false, true},
+ {"/arvados/v1/nonexistant", true, true},
+ {"/arvados/v1/collections/" + coll.UUID, false, false},
+ {"/arvados/v1/collections/" + coll.UUID, true, false},
+ {"/arvados/v1/specimens/" + specimen.UUID, false, false},
+ {"/arvados/v1/specimens/" + specimen.UUID, true, false},
+ {"/arvados/v1/collections/z1111-4zz18-0123456789abcde", false, true},
+ {"/arvados/v1/collections/z1111-4zz18-0123456789abcde", true, true},
+ {"/arvados/v1/specimens/z1111-j58dm-0123456789abcde", false, true},
+ {"/arvados/v1/specimens/z1111-j58dm-0123456789abcde", true, true},
+ }
+
+ for _, tt := range tests {
+ c.Log(c.TestName() + " " + tt.path)
+ req, err := http.NewRequest("GET", "https://"+ac1.APIHost+tt.path, nil)
+ c.Assert(err, check.IsNil)
+ customReqId := "abcdeG"
+ if !tt.reqIdProvided {
+ c.Assert(req.Header.Get("X-Request-Id"), check.Equals, "")
+ } else {
+ req.Header.Set("X-Request-Id", customReqId)
+ }
+ resp, err := ac1.Do(req)
+ c.Assert(err, check.IsNil)
+ if tt.notFoundRequest {
+ c.Check(resp.StatusCode, check.Equals, http.StatusNotFound)
+ } else {
+ c.Check(resp.StatusCode, check.Equals, http.StatusOK)
+ }
+ if !tt.reqIdProvided {
+ c.Check(resp.Header.Get("X-Request-Id"), check.Matches, "^req-[0-9a-zA-Z]{20}$")
+ if tt.notFoundRequest {
+ var jresp httpserver.ErrorResponse
+ err := json.NewDecoder(resp.Body).Decode(&jresp)
+ c.Check(err, check.IsNil)
+ c.Assert(jresp.Errors, check.HasLen, 1)
+ c.Check(jresp.Errors[0], check.Matches, "^.*(req-[0-9a-zA-Z]{20}).*$")
+ }
+ } else {
+ c.Check(resp.Header.Get("X-Request-Id"), check.Equals, customReqId)
+ if tt.notFoundRequest {
+ var jresp httpserver.ErrorResponse
+ err := json.NewDecoder(resp.Body).Decode(&jresp)
+ c.Check(err, check.IsNil)
+ c.Assert(jresp.Errors, check.HasLen, 1)
+ c.Check(jresp.Errors[0], check.Matches, "^.*("+customReqId+").*$")
+ }
+ }
+ }
+}
+
// We test the direct access to the database
// normally an integration test would not have a database access, but in this case we need
// to test tokens that are secret, so there is no API response that will give them back
return "", fmt.Errorf("cannot choose from multiple tar files in image collection: %v", tarfiles)
}
imageID := tarfiles[0][:len(tarfiles[0])-4]
- imageFile := runner.ArvMountPoint + "/by_id/" + runner.Container.ContainerImage + "/" + tarfiles[0]
+ imageTarballPath := runner.ArvMountPoint + "/by_id/" + runner.Container.ContainerImage + "/" + imageID + ".tar"
runner.CrunchLog.Printf("Using Docker image id %q", imageID)
- if !runner.executor.ImageLoaded(imageID) {
- runner.CrunchLog.Print("Loading Docker image from keep")
- err = runner.executor.LoadImage(imageFile)
- if err != nil {
- return "", err
- }
- } else {
- runner.CrunchLog.Print("Docker image is available")
+ runner.CrunchLog.Print("Loading Docker image from keep")
+ err = runner.executor.LoadImage(imageID, imageTarballPath, runner.Container, runner.ArvMountPoint,
+ runner.containerClient)
+ if err != nil {
+ return "", err
}
+
return imageID, nil
}
} else {
arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_id")
}
+ arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_uuid")
arvMountCmd = append(arvMountCmd, runner.ArvMountPoint)
runner.ArvMount, err = runner.RunArvMount(arvMountCmd, token)
}
}
}
+ runner.ArvMount = nil
}
if runner.ArvMountPoint != "" {
if rmerr := os.Remove(runner.ArvMountPoint); rmerr != nil {
runner.CrunchLog.Printf("While cleaning up arv-mount directory %s: %v", runner.ArvMountPoint, rmerr)
}
+ runner.ArvMountPoint = ""
}
if rmerr := os.RemoveAll(runner.parentTemp); rmerr != nil {
}
checkErr("stopHoststat", runner.stopHoststat())
checkErr("CommitLogs", runner.CommitLogs())
+ runner.CleanupDirs()
checkErr("UpdateContainerFinal", runner.UpdateContainerFinal())
}()
exit chan int
}
-func (e *stubExecutor) ImageLoaded(imageID string) bool { return e.imageLoaded }
-func (e *stubExecutor) LoadImage(filename string) error { e.loaded = filename; return e.loadErr }
+func (e *stubExecutor) LoadImage(imageId string, tarball string, container arvados.Container, keepMount string,
+ containerClient *arvados.Client) error {
+ e.loaded = tarball
+ return e.loadErr
+}
func (e *stubExecutor) Create(spec containerSpec) error { e.created = spec; return e.createErr }
func (e *stubExecutor) Start() error { e.exit = make(chan int, 1); go e.runFunc(); return e.startErr }
func (e *stubExecutor) CgroupID() string { return "cgroupid" }
imageID, err = s.runner.LoadImage()
c.Check(err, ErrorMatches, "image collection does not include a \\.tar image file")
c.Check(s.executor.loaded, Equals, "")
-
- // if executor reports image is already loaded, LoadImage should not be called
- s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
- s.executor.imageLoaded = true
- s.executor.loaded = ""
- s.executor.loadErr = nil
- imageID, err = s.runner.LoadImage()
- c.Check(err, IsNil)
- c.Check(s.executor.loaded, Equals, "")
- c.Check(imageID, Equals, strings.TrimSuffix(arvadostest.DockerImage112Filename, ".tar"))
}
type ArvErrorTestClient struct{}
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}})
os.RemoveAll(cr.ArvMountPoint)
cr.CleanupDirs()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "foo,bar", "--crunchstat-interval=5",
- "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/out": {realTemp + "/tmp2", false}, "/tmp": {realTemp + "/tmp3", false}})
os.RemoveAll(cr.ArvMountPoint)
cr.CleanupDirs()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}, "/etc/arvados/ca-certificates.crt": {stubCertPath, true}})
os.RemoveAll(cr.ArvMountPoint)
cr.CleanupDirs()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/keeptmp": {realTemp + "/keep1/tmp0", false}})
os.RemoveAll(cr.ArvMountPoint)
cr.CleanupDirs()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{
"/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
"/keepout": {realTemp + "/keep1/tmp0", false},
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{
"/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
"/keepout": {realTemp + "/keep1/tmp0", false},
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{
"/tmp": {realTemp + "/tmp2", false},
"/tmp/foo": {realTemp + "/keep1/tmp0", true},
"strings"
"time"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
dockertypes "github.com/docker/docker/api/types"
dockercontainer "github.com/docker/docker/api/types/container"
dockerclient "github.com/docker/docker/client"
}, err
}
-func (e *dockerExecutor) ImageLoaded(imageID string) bool {
+func (e *dockerExecutor) LoadImage(imageID string, imageTarballPath string, container arvados.Container, arvMountPoint string,
+ containerClient *arvados.Client) error {
_, _, err := e.dockerclient.ImageInspectWithRaw(context.TODO(), imageID)
- return err == nil
-}
+ if err == nil {
+ // already loaded
+ return nil
+ }
-func (e *dockerExecutor) LoadImage(filename string) error {
- f, err := os.Open(filename)
+ f, err := os.Open(imageTarballPath)
if err != nil {
return err
}
import (
"io"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
"golang.org/x/net/context"
)
// containerExecutor is an interface to a container runtime
// (docker/singularity).
type containerExecutor interface {
- // ImageLoaded determines whether the given image is already
- // available to use without calling ImageLoad.
- ImageLoaded(imageID string) bool
-
// ImageLoad loads the image from the given tarball such that
// it can be used to create/start a container.
- LoadImage(filename string) error
+ LoadImage(imageID string, imageTarballPath string, container arvados.Container, keepMount string,
+ containerClient *arvados.Client) error
// Wait for the container process to finish, and return its
// exit code. If applicable, also remove the stopped container
"strings"
"time"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
"golang.org/x/net/context"
. "gopkg.in/check.v1"
)
Stdout: nopWriteCloser{&s.stdout},
Stderr: nopWriteCloser{&s.stderr},
}
- err := s.executor.LoadImage(busyboxDockerImage(c))
+ err := s.executor.LoadImage("", busyboxDockerImage(c), arvados.Container{}, "", nil)
c.Assert(err, IsNil)
}
package crunchrun
import (
+ "fmt"
"io/ioutil"
"os"
"os/exec"
"sort"
"syscall"
+ "time"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
"golang.org/x/net/context"
)
}, nil
}
-func (e *singularityExecutor) ImageLoaded(string) bool {
- return false
+func (e *singularityExecutor) getOrCreateProject(ownerUuid string, name string, containerClient *arvados.Client) (*arvados.Group, error) {
+ var gp arvados.GroupList
+ err := containerClient.RequestAndDecode(&gp,
+ arvados.EndpointGroupList.Method,
+ arvados.EndpointGroupList.Path,
+ nil, arvados.ListOptions{Filters: []arvados.Filter{
+ arvados.Filter{"owner_uuid", "=", ownerUuid},
+ arvados.Filter{"name", "=", name},
+ arvados.Filter{"group_class", "=", "project"},
+ },
+ Limit: 1})
+ if err != nil {
+ return nil, err
+ }
+ if len(gp.Items) == 1 {
+ return &gp.Items[0], nil
+ }
+
+ var rgroup arvados.Group
+ err = containerClient.RequestAndDecode(&rgroup,
+ arvados.EndpointGroupCreate.Method,
+ arvados.EndpointGroupCreate.Path,
+ nil, map[string]interface{}{
+ "group": map[string]string{
+ "owner_uuid": ownerUuid,
+ "name": name,
+ "group_class": "project",
+ },
+ })
+ if err != nil {
+ return nil, err
+ }
+ return &rgroup, nil
+}
+
+func (e *singularityExecutor) checkImageCache(dockerImageID string, container arvados.Container, arvMountPoint string,
+ containerClient *arvados.Client) (collection *arvados.Collection, err error) {
+
+ // Cache the image to keep
+ cacheGroup, err := e.getOrCreateProject(container.RuntimeUserUUID, ".cache", containerClient)
+ if err != nil {
+ return nil, fmt.Errorf("error getting '.cache' project: %v", err)
+ }
+ imageGroup, err := e.getOrCreateProject(cacheGroup.UUID, "auto-generated singularity images", containerClient)
+ if err != nil {
+ return nil, fmt.Errorf("error getting 'auto-generated singularity images' project: %s", err)
+ }
+
+ collectionName := fmt.Sprintf("singularity image for %v", dockerImageID)
+ var cl arvados.CollectionList
+ err = containerClient.RequestAndDecode(&cl,
+ arvados.EndpointCollectionList.Method,
+ arvados.EndpointCollectionList.Path,
+ nil, arvados.ListOptions{Filters: []arvados.Filter{
+ arvados.Filter{"owner_uuid", "=", imageGroup.UUID},
+ arvados.Filter{"name", "=", collectionName},
+ },
+ Limit: 1})
+ if err != nil {
+ return nil, fmt.Errorf("error querying for collection '%v': %v", collectionName, err)
+ }
+ var imageCollection arvados.Collection
+ if len(cl.Items) == 1 {
+ imageCollection = cl.Items[0]
+ } else {
+ collectionName := collectionName + " " + time.Now().UTC().Format(time.RFC3339)
+ exp := time.Now().Add(24 * 7 * 2 * time.Hour)
+ err = containerClient.RequestAndDecode(&imageCollection,
+ arvados.EndpointCollectionCreate.Method,
+ arvados.EndpointCollectionCreate.Path,
+ nil, map[string]interface{}{
+ "collection": map[string]string{
+ "owner_uuid": imageGroup.UUID,
+ "name": collectionName,
+ "trash_at": exp.UTC().Format(time.RFC3339),
+ },
+ })
+ if err != nil {
+ return nil, fmt.Errorf("error creating '%v' collection: %s", collectionName, err)
+ }
+
+ }
+
+ return &imageCollection, nil
}
// LoadImage will satisfy ContainerExecuter interface transforming
// containerImage into a sif file for later use.
-func (e *singularityExecutor) LoadImage(imageTarballPath string) error {
- e.logf("building singularity image")
- // "singularity build" does not accept a
- // docker-archive://... filename containing a ":" character,
- // as in "/path/to/sha256:abcd...1234.tar". Workaround: make a
- // symlink that doesn't have ":" chars.
- err := os.Symlink(imageTarballPath, e.tmpdir+"/image.tar")
+func (e *singularityExecutor) LoadImage(dockerImageID string, imageTarballPath string, container arvados.Container, arvMountPoint string,
+ containerClient *arvados.Client) error {
+
+ var imageFilename string
+ var sifCollection *arvados.Collection
+ var err error
+ if containerClient != nil {
+ sifCollection, err = e.checkImageCache(dockerImageID, container, arvMountPoint, containerClient)
+ if err != nil {
+ return err
+ }
+ imageFilename = fmt.Sprintf("%s/by_uuid/%s/image.sif", arvMountPoint, sifCollection.UUID)
+ } else {
+ imageFilename = e.tmpdir + "/image.sif"
+ }
+
+ if _, err := os.Stat(imageFilename); os.IsNotExist(err) {
+ e.logf("building singularity image")
+ // "singularity build" does not accept a
+ // docker-archive://... filename containing a ":" character,
+ // as in "/path/to/sha256:abcd...1234.tar". Workaround: make a
+ // symlink that doesn't have ":" chars.
+ err := os.Symlink(imageTarballPath, e.tmpdir+"/image.tar")
+ if err != nil {
+ return err
+ }
+
+ build := exec.Command("singularity", "build", imageFilename, "docker-archive://"+e.tmpdir+"/image.tar")
+ e.logf("%v", build.Args)
+ out, err := build.CombinedOutput()
+ // INFO: Starting build...
+ // Getting image source signatures
+ // Copying blob ab15617702de done
+ // Copying config 651e02b8a2 done
+ // Writing manifest to image destination
+ // Storing signatures
+ // 2021/04/22 14:42:14 info unpack layer: sha256:21cbfd3a344c52b197b9fa36091e66d9cbe52232703ff78d44734f85abb7ccd3
+ // INFO: Creating SIF file...
+ // INFO: Build complete: arvados-jobs.latest.sif
+ e.logf("%s", out)
+ if err != nil {
+ return err
+ }
+ }
+
+ if containerClient == nil {
+ e.imageFilename = imageFilename
+ return nil
+ }
+
+ // update TTL to now + two weeks
+ exp := time.Now().Add(24 * 7 * 2 * time.Hour)
+
+ uuidPath, err := containerClient.PathForUUID("update", sifCollection.UUID)
if err != nil {
- return err
+ e.logf("error PathForUUID: %v", err)
+ return nil
+ }
+ var imageCollection arvados.Collection
+ err = containerClient.RequestAndDecode(&imageCollection,
+ arvados.EndpointCollectionUpdate.Method,
+ uuidPath,
+ nil, map[string]interface{}{
+ "collection": map[string]string{
+ "name": fmt.Sprintf("singularity image for %v", dockerImageID),
+ "trash_at": exp.UTC().Format(time.RFC3339),
+ },
+ })
+ if err == nil {
+ // If we just wrote the image to the cache, the
+ // response also returns the updated PDH
+ e.imageFilename = fmt.Sprintf("%s/by_id/%s/image.sif", arvMountPoint, imageCollection.PortableDataHash)
+ return nil
}
- e.imageFilename = e.tmpdir + "/image.sif"
- build := exec.Command("singularity", "build", e.imageFilename, "docker-archive://"+e.tmpdir+"/image.tar")
- e.logf("%v", build.Args)
- out, err := build.CombinedOutput()
- // INFO: Starting build...
- // Getting image source signatures
- // Copying blob ab15617702de done
- // Copying config 651e02b8a2 done
- // Writing manifest to image destination
- // Storing signatures
- // 2021/04/22 14:42:14 info unpack layer: sha256:21cbfd3a344c52b197b9fa36091e66d9cbe52232703ff78d44734f85abb7ccd3
- // INFO: Creating SIF file...
- // INFO: Build complete: arvados-jobs.latest.sif
- e.logf("%s", out)
+
+ e.logf("error updating/renaming collection for cached sif image: %v", err)
+ // Failed to update but maybe it lost a race and there is
+ // another cached collection in the same place, so check the cache
+ // again
+ sifCollection, err = e.checkImageCache(dockerImageID, container, arvMountPoint, containerClient)
if err != nil {
return err
}
+ e.imageFilename = fmt.Sprintf("%s/by_id/%s/image.sif", arvMountPoint, sifCollection.PortableDataHash)
+
return nil
}
mount := e.spec.BindMounts[path]
args = append(args, "--bind", mount.HostPath+":"+path+":"+readonlyflag[mount.ReadOnly])
}
- args = append(args, e.imageFilename)
- args = append(args, e.spec.Command...)
// This is for singularity 3.5.2. There are some behaviors
// that will change in singularity 3.6, please see:
// https://sylabs.io/guides/3.5/user-guide/environment_and_metadata.html
env := make([]string, 0, len(e.spec.Env))
for k, v := range e.spec.Env {
- env = append(env, "SINGULARITYENV_"+k+"="+v)
+ if k == "HOME" {
+ // $HOME is a special case
+ args = append(args, "--home="+v)
+ } else {
+ env = append(env, "SINGULARITYENV_"+k+"="+v)
+ }
}
+ args = append(args, e.imageFilename)
+ args = append(args, e.spec.Command...)
+
path, err := exec.LookPath(args[0])
if err != nil {
return err
{"mkdir", "-p", "log", "tmp", ".bundle", "/var/www/.gem", "/var/www/.bundle", "/var/www/.passenger"},
{"touch", "log/production.log"},
{"chown", "-R", "--from=root", "www-data:www-data", "/var/www/.gem", "/var/www/.bundle", "/var/www/.passenger", "log", "tmp", ".bundle", "Gemfile.lock", "config.ru", "config/environment.rb"},
- {"sudo", "-u", "www-data", "/var/lib/arvados/bin/gem", "install", "--user", "--conservative", "--no-document", "bundler:1.16.6", "bundler:1.17.3", "bundler:2.0.2"},
+ {"sudo", "-u", "www-data", "/var/lib/arvados/bin/gem", "install", "--user", "--conservative", "--no-document", "bundler:2.2.19"},
{"sudo", "-u", "www-data", "/var/lib/arvados/bin/bundle", "install", "--deployment", "--jobs", "8", "--path", "/var/www/.gem"},
{"sudo", "-u", "www-data", "/var/lib/arvados/bin/bundle", "exec", "passenger-config", "build-native-support"},
{"sudo", "-u", "www-data", "/var/lib/arvados/bin/bundle", "exec", "passenger-config", "install-standalone-runtime"},
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package lsf
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "math"
+ "net/http"
+ "regexp"
+ "strings"
+ "sync"
+ "time"
+
+ "git.arvados.org/arvados.git/lib/cmd"
+ "git.arvados.org/arvados.git/lib/dispatchcloud"
+ "git.arvados.org/arvados.git/lib/service"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadosclient"
+ "git.arvados.org/arvados.git/sdk/go/auth"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "git.arvados.org/arvados.git/sdk/go/dispatch"
+ "git.arvados.org/arvados.git/sdk/go/health"
+ "github.com/julienschmidt/httprouter"
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/client_golang/prometheus/promhttp"
+ "github.com/sirupsen/logrus"
+)
+
+var DispatchCommand cmd.Handler = service.Command(arvados.ServiceNameDispatchLSF, newHandler)
+
+func newHandler(ctx context.Context, cluster *arvados.Cluster, token string, reg *prometheus.Registry) service.Handler {
+ ac, err := arvados.NewClientFromConfig(cluster)
+ if err != nil {
+ return service.ErrorHandler(ctx, cluster, fmt.Errorf("error initializing client from cluster config: %s", err))
+ }
+ d := &dispatcher{
+ Cluster: cluster,
+ Context: ctx,
+ ArvClient: ac,
+ AuthToken: token,
+ Registry: reg,
+ }
+ go d.Start()
+ return d
+}
+
+type dispatcher struct {
+ Cluster *arvados.Cluster
+ Context context.Context
+ ArvClient *arvados.Client
+ AuthToken string
+ Registry *prometheus.Registry
+
+ logger logrus.FieldLogger
+ lsfcli lsfcli
+ lsfqueue lsfqueue
+ arvDispatcher *dispatch.Dispatcher
+ httpHandler http.Handler
+
+ initOnce sync.Once
+ stop chan struct{}
+ stopped chan struct{}
+}
+
+// Start starts the dispatcher. Start can be called multiple times
+// with no ill effect.
+func (disp *dispatcher) Start() {
+ disp.initOnce.Do(func() {
+ disp.init()
+ go func() {
+ disp.checkLsfQueueForOrphans()
+ err := disp.arvDispatcher.Run(disp.Context)
+ if err != nil {
+ disp.logger.Error(err)
+ disp.Close()
+ }
+ }()
+ })
+}
+
+// ServeHTTP implements service.Handler.
+func (disp *dispatcher) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ disp.Start()
+ disp.httpHandler.ServeHTTP(w, r)
+}
+
+// CheckHealth implements service.Handler.
+func (disp *dispatcher) CheckHealth() error {
+ disp.Start()
+ select {
+ case <-disp.stopped:
+ return errors.New("stopped")
+ default:
+ return nil
+ }
+}
+
+// Done implements service.Handler.
+func (disp *dispatcher) Done() <-chan struct{} {
+ return disp.stopped
+}
+
+// Stop dispatching containers and release resources. Used by tests.
+func (disp *dispatcher) Close() {
+ disp.Start()
+ select {
+ case disp.stop <- struct{}{}:
+ default:
+ }
+ <-disp.stopped
+}
+
+func (disp *dispatcher) init() {
+ disp.logger = ctxlog.FromContext(disp.Context)
+ disp.lsfcli.logger = disp.logger
+ disp.lsfqueue = lsfqueue{
+ logger: disp.logger,
+ period: time.Duration(disp.Cluster.Containers.CloudVMs.PollInterval),
+ lsfcli: &disp.lsfcli,
+ }
+ disp.ArvClient.AuthToken = disp.AuthToken
+ disp.stop = make(chan struct{}, 1)
+ disp.stopped = make(chan struct{})
+
+ arv, err := arvadosclient.New(disp.ArvClient)
+ if err != nil {
+ disp.logger.Fatalf("Error making Arvados client: %v", err)
+ }
+ arv.Retries = 25
+ arv.ApiToken = disp.AuthToken
+ disp.arvDispatcher = &dispatch.Dispatcher{
+ Arv: arv,
+ Logger: disp.logger,
+ BatchSize: disp.Cluster.API.MaxItemsPerResponse,
+ RunContainer: disp.runContainer,
+ PollPeriod: time.Duration(disp.Cluster.Containers.CloudVMs.PollInterval),
+ MinRetryPeriod: time.Duration(disp.Cluster.Containers.MinRetryPeriod),
+ }
+
+ if disp.Cluster.ManagementToken == "" {
+ disp.httpHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ http.Error(w, "Management API authentication is not configured", http.StatusForbidden)
+ })
+ } else {
+ mux := httprouter.New()
+ metricsH := promhttp.HandlerFor(disp.Registry, promhttp.HandlerOpts{
+ ErrorLog: disp.logger,
+ })
+ mux.Handler("GET", "/metrics", metricsH)
+ mux.Handler("GET", "/metrics.json", metricsH)
+ mux.Handler("GET", "/_health/:check", &health.Handler{
+ Token: disp.Cluster.ManagementToken,
+ Prefix: "/_health/",
+ Routes: health.Routes{"ping": disp.CheckHealth},
+ })
+ disp.httpHandler = auth.RequireLiteralToken(disp.Cluster.ManagementToken, mux)
+ }
+}
+
+func (disp *dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) error {
+ ctx, cancel := context.WithCancel(disp.Context)
+ defer cancel()
+
+ if ctr.State != dispatch.Locked {
+ // already started by prior invocation
+ } else if _, ok := disp.lsfqueue.JobID(ctr.UUID); !ok {
+ disp.logger.Printf("Submitting container %s to LSF", ctr.UUID)
+ cmd := []string{disp.Cluster.Containers.CrunchRunCommand}
+ cmd = append(cmd, "--runtime-engine="+disp.Cluster.Containers.RuntimeEngine)
+ cmd = append(cmd, disp.Cluster.Containers.CrunchRunArgumentsList...)
+ err := disp.submit(ctr, cmd)
+ if err != nil {
+ return err
+ }
+ }
+
+ disp.logger.Printf("Start monitoring container %v in state %q", ctr.UUID, ctr.State)
+ defer disp.logger.Printf("Done monitoring container %s", ctr.UUID)
+
+ // If the container disappears from the lsf queue, there is
+ // no point in waiting for further dispatch updates: just
+ // clean up and return.
+ go func(uuid string) {
+ for ctx.Err() == nil {
+ if _, ok := disp.lsfqueue.JobID(uuid); !ok {
+ disp.logger.Printf("container %s job disappeared from LSF queue", uuid)
+ cancel()
+ return
+ }
+ }
+ }(ctr.UUID)
+
+ for done := false; !done; {
+ select {
+ case <-ctx.Done():
+ // Disappeared from lsf queue
+ if err := disp.arvDispatcher.Arv.Get("containers", ctr.UUID, nil, &ctr); err != nil {
+ disp.logger.Printf("error getting final container state for %s: %s", ctr.UUID, err)
+ }
+ switch ctr.State {
+ case dispatch.Running:
+ disp.arvDispatcher.UpdateState(ctr.UUID, dispatch.Cancelled)
+ case dispatch.Locked:
+ disp.arvDispatcher.Unlock(ctr.UUID)
+ }
+ return nil
+ case updated, ok := <-status:
+ if !ok {
+ // status channel is closed, which is
+ // how arvDispatcher tells us to stop
+ // touching the container record, kill
+ // off any remaining LSF processes,
+ // etc.
+ done = true
+ break
+ }
+ if updated.State != ctr.State {
+ disp.logger.Infof("container %s changed state from %s to %s", ctr.UUID, ctr.State, updated.State)
+ }
+ ctr = updated
+ if ctr.Priority < 1 {
+ disp.logger.Printf("container %s has state %s, priority %d: cancel lsf job", ctr.UUID, ctr.State, ctr.Priority)
+ disp.bkill(ctr)
+ } else {
+ disp.lsfqueue.SetPriority(ctr.UUID, int64(ctr.Priority))
+ }
+ }
+ }
+ disp.logger.Printf("container %s is done", ctr.UUID)
+
+ // Try "bkill" every few seconds until the LSF job disappears
+ // from the queue.
+ ticker := time.NewTicker(5 * time.Second)
+ defer ticker.Stop()
+ for jobid, ok := disp.lsfqueue.JobID(ctr.UUID); ok; _, ok = disp.lsfqueue.JobID(ctr.UUID) {
+ err := disp.lsfcli.Bkill(jobid)
+ if err != nil {
+ disp.logger.Warnf("%s: bkill(%d): %s", ctr.UUID, jobid, err)
+ }
+ <-ticker.C
+ }
+ return nil
+}
+
+func (disp *dispatcher) submit(container arvados.Container, crunchRunCommand []string) error {
+ // Start with an empty slice here to ensure append() doesn't
+ // modify crunchRunCommand's underlying array
+ var crArgs []string
+ crArgs = append(crArgs, crunchRunCommand...)
+ crArgs = append(crArgs, container.UUID)
+ crScript := execScript(crArgs)
+
+ bsubArgs, err := disp.bsubArgs(container)
+ if err != nil {
+ return err
+ }
+ return disp.lsfcli.Bsub(crScript, bsubArgs, disp.ArvClient)
+}
+
+func (disp *dispatcher) bkill(ctr arvados.Container) {
+ if jobid, ok := disp.lsfqueue.JobID(ctr.UUID); !ok {
+ disp.logger.Debugf("bkill(%s): redundant, job not in queue", ctr.UUID)
+ } else if err := disp.lsfcli.Bkill(jobid); err != nil {
+ disp.logger.Warnf("%s: bkill(%d): %s", ctr.UUID, jobid, err)
+ }
+}
+
+func (disp *dispatcher) bsubArgs(container arvados.Container) ([]string, error) {
+ args := []string{"bsub"}
+ args = append(args, disp.Cluster.Containers.LSF.BsubArgumentsList...)
+ args = append(args, "-J", container.UUID)
+ args = append(args, disp.bsubConstraintArgs(container)...)
+ if u := disp.Cluster.Containers.LSF.BsubSudoUser; u != "" {
+ args = append([]string{"sudo", "-E", "-u", u}, args...)
+ }
+ return args, nil
+}
+
+func (disp *dispatcher) bsubConstraintArgs(container arvados.Container) []string {
+ // TODO: propagate container.SchedulingParameters.Partitions
+ tmp := int64(math.Ceil(float64(dispatchcloud.EstimateScratchSpace(&container)) / 1048576))
+ vcpus := container.RuntimeConstraints.VCPUs
+ mem := int64(math.Ceil(float64(container.RuntimeConstraints.RAM+
+ container.RuntimeConstraints.KeepCacheRAM+
+ int64(disp.Cluster.Containers.ReserveExtraRAM)) / 1048576))
+ return []string{
+ "-R", fmt.Sprintf("rusage[mem=%dMB:tmp=%dMB] affinity[core(%d)]", mem, tmp, vcpus),
+ }
+}
+
+// Check the next bjobs report, and invoke TrackContainer for all the
+// containers in the report. This gives us a chance to cancel existing
+// Arvados LSF jobs (started by a previous dispatch process) that
+// never released their LSF job allocations even though their
+// container states are Cancelled or Complete. See
+// https://dev.arvados.org/issues/10979
+func (disp *dispatcher) checkLsfQueueForOrphans() {
+ containerUuidPattern := regexp.MustCompile(`^[a-z0-9]{5}-dz642-[a-z0-9]{15}$`)
+ for _, uuid := range disp.lsfqueue.All() {
+ if !containerUuidPattern.MatchString(uuid) || !strings.HasPrefix(uuid, disp.Cluster.ClusterID) {
+ continue
+ }
+ err := disp.arvDispatcher.TrackContainer(uuid)
+ if err != nil {
+ disp.logger.Warnf("checkLsfQueueForOrphans: TrackContainer(%s): %s", uuid, err)
+ }
+ }
+}
+
+func execScript(args []string) []byte {
+ s := "#!/bin/sh\nexec"
+ for _, w := range args {
+ s += ` '`
+ s += strings.Replace(w, `'`, `'\''`, -1)
+ s += `'`
+ }
+ return []byte(s + "\n")
+}
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package lsf
+
+import (
+ "context"
+ "fmt"
+ "math/rand"
+ "os/exec"
+ "strconv"
+ "sync"
+ "testing"
+ "time"
+
+ "git.arvados.org/arvados.git/lib/config"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadostest"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "github.com/prometheus/client_golang/prometheus"
+ "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) {
+ check.TestingT(t)
+}
+
+var _ = check.Suite(&suite{})
+
+type suite struct {
+ disp *dispatcher
+}
+
+func (s *suite) TearDownTest(c *check.C) {
+ arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil)
+}
+
+func (s *suite) SetUpTest(c *check.C) {
+ cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
+ c.Assert(err, check.IsNil)
+ cluster, err := cfg.GetCluster("")
+ c.Assert(err, check.IsNil)
+ cluster.Containers.CloudVMs.PollInterval = arvados.Duration(time.Second)
+ s.disp = newHandler(context.Background(), cluster, arvadostest.Dispatch1Token, prometheus.NewRegistry()).(*dispatcher)
+ s.disp.lsfcli.stubCommand = func(string, ...string) *exec.Cmd {
+ return exec.Command("bash", "-c", "echo >&2 unimplemented stub; false")
+ }
+}
+
+type lsfstub struct {
+ sudoUser string
+ errorRate float64
+}
+
+func (stub lsfstub) stubCommand(c *check.C) func(prog string, args ...string) *exec.Cmd {
+ mtx := sync.Mutex{}
+ nextjobid := 100
+ fakejobq := map[int]string{}
+ return func(prog string, args ...string) *exec.Cmd {
+ c.Logf("stubCommand: %q %q", prog, args)
+ if rand.Float64() < stub.errorRate {
+ return exec.Command("bash", "-c", "echo >&2 'stub random failure' && false")
+ }
+ if stub.sudoUser != "" && len(args) > 3 &&
+ prog == "sudo" &&
+ args[0] == "-E" &&
+ args[1] == "-u" &&
+ args[2] == stub.sudoUser {
+ prog, args = args[3], args[4:]
+ }
+ switch prog {
+ case "bsub":
+ c.Assert(args, check.HasLen, 4)
+ c.Check(args[0], check.Equals, "-J")
+ switch args[1] {
+ case arvadostest.LockedContainerUUID:
+ c.Check(args, check.DeepEquals, []string{"-J", arvadostest.LockedContainerUUID, "-R", "rusage[mem=11701MB:tmp=0MB] affinity[core(4)]"})
+ mtx.Lock()
+ fakejobq[nextjobid] = args[1]
+ nextjobid++
+ mtx.Unlock()
+ case arvadostest.QueuedContainerUUID:
+ c.Check(args, check.DeepEquals, []string{"-J", arvadostest.QueuedContainerUUID, "-R", "rusage[mem=11701MB:tmp=45777MB] affinity[core(4)]"})
+ mtx.Lock()
+ fakejobq[nextjobid] = args[1]
+ nextjobid++
+ mtx.Unlock()
+ default:
+ c.Errorf("unexpected uuid passed to bsub: args %q", args)
+ return exec.Command("false")
+ }
+ return exec.Command("echo", "submitted job")
+ case "bjobs":
+ c.Check(args, check.DeepEquals, []string{"-u", "all", "-noheader", "-o", "jobid stat job_name:30"})
+ out := ""
+ for jobid, uuid := range fakejobq {
+ out += fmt.Sprintf(`%d %s %s\n`, jobid, "RUN", uuid)
+ }
+ c.Logf("bjobs out: %q", out)
+ return exec.Command("printf", out)
+ case "bkill":
+ killid, _ := strconv.Atoi(args[0])
+ if uuid, ok := fakejobq[killid]; !ok {
+ return exec.Command("bash", "-c", fmt.Sprintf("printf >&2 'Job <%d>: No matching job found\n'", killid))
+ } else if uuid == "" {
+ return exec.Command("bash", "-c", fmt.Sprintf("printf >&2 'Job <%d>: Job has already finished\n'", killid))
+ } else {
+ go func() {
+ time.Sleep(time.Millisecond)
+ mtx.Lock()
+ delete(fakejobq, killid)
+ mtx.Unlock()
+ }()
+ return exec.Command("bash", "-c", fmt.Sprintf("printf 'Job <%d> is being terminated\n'", killid))
+ }
+ default:
+ return exec.Command("bash", "-c", fmt.Sprintf("echo >&2 'stub: command not found: %+q'", prog))
+ }
+ }
+}
+
+func (s *suite) TestSubmit(c *check.C) {
+ s.disp.lsfcli.stubCommand = lsfstub{
+ errorRate: 0.1,
+ sudoUser: s.disp.Cluster.Containers.LSF.BsubSudoUser,
+ }.stubCommand(c)
+ s.disp.Start()
+ deadline := time.Now().Add(20 * time.Second)
+ for range time.NewTicker(time.Second).C {
+ if time.Now().After(deadline) {
+ c.Error("timed out")
+ break
+ }
+ // "queuedcontainer" should be running
+ if _, ok := s.disp.lsfqueue.JobID(arvadostest.QueuedContainerUUID); !ok {
+ continue
+ }
+ // "lockedcontainer" should be cancelled because it
+ // has priority 0 (no matching container requests)
+ if _, ok := s.disp.lsfqueue.JobID(arvadostest.LockedContainerUUID); ok {
+ continue
+ }
+ var ctr arvados.Container
+ if err := s.disp.arvDispatcher.Arv.Get("containers", arvadostest.LockedContainerUUID, nil, &ctr); err != nil {
+ c.Logf("error getting container state for %s: %s", arvadostest.LockedContainerUUID, err)
+ continue
+ }
+ if ctr.State != arvados.ContainerStateQueued {
+ c.Logf("LockedContainer is not in the LSF queue but its arvados record has not been updated to state==Queued (state is %q)", ctr.State)
+ continue
+ }
+ c.Log("reached desired state")
+ break
+ }
+}
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package lsf
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "os/exec"
+ "strings"
+
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "github.com/sirupsen/logrus"
+)
+
+type bjobsEntry struct {
+ id int
+ name string
+ stat string
+}
+
+type lsfcli struct {
+ logger logrus.FieldLogger
+ // (for testing) if non-nil, call stubCommand() instead of
+ // exec.Command() when running lsf command line programs.
+ stubCommand func(string, ...string) *exec.Cmd
+}
+
+func (cli lsfcli) command(prog string, args ...string) *exec.Cmd {
+ if f := cli.stubCommand; f != nil {
+ return f(prog, args...)
+ } else {
+ return exec.Command(prog, args...)
+ }
+}
+
+func (cli lsfcli) Bsub(script []byte, args []string, arv *arvados.Client) error {
+ cli.logger.Infof("bsub command %q script %q", args, script)
+ cmd := cli.command(args[0], args[1:]...)
+ cmd.Env = append([]string(nil), os.Environ()...)
+ cmd.Env = append(cmd.Env, "ARVADOS_API_HOST="+arv.APIHost)
+ cmd.Env = append(cmd.Env, "ARVADOS_API_TOKEN="+arv.AuthToken)
+ if arv.Insecure {
+ cmd.Env = append(cmd.Env, "ARVADOS_API_HOST_INSECURE=1")
+ }
+ cmd.Stdin = bytes.NewReader(script)
+ out, err := cmd.Output()
+ cli.logger.WithField("stdout", string(out)).Infof("bsub finished")
+ return errWithStderr(err)
+}
+
+func (cli lsfcli) Bjobs() ([]bjobsEntry, error) {
+ cli.logger.Debugf("Bjobs()")
+ cmd := cli.command("bjobs", "-u", "all", "-noheader", "-o", "jobid stat job_name:30")
+ buf, err := cmd.Output()
+ if err != nil {
+ return nil, errWithStderr(err)
+ }
+ var bjobs []bjobsEntry
+ for _, line := range strings.Split(string(buf), "\n") {
+ if line == "" {
+ continue
+ }
+ var ent bjobsEntry
+ if _, err := fmt.Sscan(line, &ent.id, &ent.stat, &ent.name); err != nil {
+ cli.logger.Warnf("ignoring unparsed line in bjobs output: %q", line)
+ continue
+ }
+ bjobs = append(bjobs, ent)
+ }
+ return bjobs, nil
+}
+
+func (cli lsfcli) Bkill(id int) error {
+ cli.logger.Infof("Bkill(%d)", id)
+ cmd := cli.command("bkill", fmt.Sprintf("%d", id))
+ buf, err := cmd.CombinedOutput()
+ if err == nil || strings.Index(string(buf), "already finished") >= 0 {
+ return nil
+ } else {
+ return fmt.Errorf("%s (%q)", err, buf)
+ }
+}
+
+func errWithStderr(err error) error {
+ if err, ok := err.(*exec.ExitError); ok {
+ return fmt.Errorf("%s (%q)", err, err.Stderr)
+ }
+ return err
+}
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package lsf
+
+import (
+ "sync"
+ "time"
+
+ "github.com/sirupsen/logrus"
+)
+
+type lsfqueue struct {
+ logger logrus.FieldLogger
+ period time.Duration
+ lsfcli *lsfcli
+
+ initOnce sync.Once
+ mutex sync.Mutex
+ nextReady chan (<-chan struct{})
+ updated *sync.Cond
+ latest map[string]bjobsEntry
+}
+
+// JobID waits for the next queue update (so even a job that was only
+// submitted a nanosecond ago will show up) and then returns the LSF
+// job ID corresponding to the given container UUID.
+func (q *lsfqueue) JobID(uuid string) (int, bool) {
+ ent, ok := q.getNext()[uuid]
+ return ent.id, ok
+}
+
+// All waits for the next queue update, then returns the names of all
+// jobs in the queue. Used by checkLsfQueueForOrphans().
+func (q *lsfqueue) All() []string {
+ latest := q.getNext()
+ names := make([]string, 0, len(latest))
+ for name := range latest {
+ names = append(names, name)
+ }
+ return names
+}
+
+func (q *lsfqueue) SetPriority(uuid string, priority int64) {
+ q.initOnce.Do(q.init)
+ q.logger.Debug("SetPriority is not implemented")
+}
+
+func (q *lsfqueue) getNext() map[string]bjobsEntry {
+ q.initOnce.Do(q.init)
+ <-(<-q.nextReady)
+ q.mutex.Lock()
+ defer q.mutex.Unlock()
+ return q.latest
+}
+
+func (q *lsfqueue) init() {
+ q.updated = sync.NewCond(&q.mutex)
+ q.nextReady = make(chan (<-chan struct{}))
+ ticker := time.NewTicker(time.Second)
+ go func() {
+ for range ticker.C {
+ // Send a new "next update ready" channel to
+ // the next goroutine that wants one (and any
+ // others that have already queued up since
+ // the first one started waiting).
+ //
+ // Below, when we get a new update, we'll
+ // signal that to the other goroutines by
+ // closing the ready chan.
+ ready := make(chan struct{})
+ q.nextReady <- ready
+ for {
+ select {
+ case q.nextReady <- ready:
+ continue
+ default:
+ }
+ break
+ }
+ // Run bjobs repeatedly if needed, until we
+ // get valid output.
+ var ents []bjobsEntry
+ for {
+ q.logger.Debug("running bjobs")
+ var err error
+ ents, err = q.lsfcli.Bjobs()
+ if err == nil {
+ break
+ }
+ q.logger.Warnf("bjobs: %s", err)
+ <-ticker.C
+ }
+ next := make(map[string]bjobsEntry, len(ents))
+ for _, ent := range ents {
+ next[ent.name] = ent
+ }
+ // Replace q.latest and notify all the
+ // goroutines that the "next update" they
+ // asked for is now ready.
+ q.mutex.Lock()
+ q.latest = next
+ q.mutex.Unlock()
+ close(ready)
+ }
+ }()
+}
"io"
"net"
"net/http"
+ _ "net/http/pprof"
"net/url"
"os"
"strings"
loader := config.NewLoader(stdin, log)
loader.SetupFlags(flags)
versionFlag := flags.Bool("version", false, "Write version information to stdout and exit 0")
+ pprofAddr := flags.String("pprof", "", "Serve Go profile data at `[addr]:port`")
err = flags.Parse(args)
if err == flag.ErrHelp {
err = nil
return cmd.Version.RunCommand(prog, args, stdin, stdout, stderr)
}
+ if *pprofAddr != "" {
+ go func() {
+ log.Println(http.ListenAndServe(*pprofAddr, nil))
+ }()
+ }
+
if strings.HasSuffix(prog, "controller") {
// Some config-loader checks try to make API calls via
// controller. Those can't be expected to work if this
import cwltool.workflow
import cwltool.process
import cwltool.argparser
+from cwltool.errors import WorkflowException
from cwltool.process import shortname, UnsupportedRequirement, use_custom_schema
from cwltool.utils import adjustFileObjs, adjustDirObjs, get_listing
help="Enable loading and running development versions "
"of the CWL standards.", default=False)
parser.add_argument('--storage-classes', default="default",
- help="Specify comma separated list of storage classes to be used when saving workflow output to Keep.")
+ help="Specify comma separated list of storage classes to be used when saving final workflow output to Keep.")
+ parser.add_argument('--intermediate-storage-classes', default="default",
+ help="Specify comma separated list of storage classes to be used when saving intermediate workflow output to Keep.")
parser.add_argument("--intermediate-output-ttl", type=int, metavar="N",
help="If N > 0, intermediate output collections will be trashed N seconds after creation. Default is 0 (don't trash).",
"http://commonwl.org/cwltool#LoadListingRequirement",
"http://arvados.org/cwl#IntermediateOutput",
"http://arvados.org/cwl#ReuseRequirement",
- "http://arvados.org/cwl#ClusterTarget"
+ "http://arvados.org/cwl#ClusterTarget",
+ "http://arvados.org/cwl#OutputStorageClass"
])
def exit_signal_handler(sigcode, frame):
job_order_object = None
arvargs = parser.parse_args(args)
- if len(arvargs.storage_classes.strip().split(',')) > 1:
- logger.error(str(u"Multiple storage classes are not supported currently."))
- return 1
-
arvargs.use_container = True
arvargs.relax_path_checks = True
arvargs.print_supported_versions = False
if keep_client is None:
keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4)
+ except WorkflowException as e:
+ logger.error(e, exc_info=(sys.exc_info()[1] if arvargs.debug else False))
+ return 1
except Exception:
logger.exception("Error creating the Arvados CWL Executor")
return 1
project_uuid:
type: string?
doc: The project that will own the container requests and intermediate collections
+
+
+- name: OutputStorageClass
+ type: record
+ extends: cwl:ProcessRequirement
+ inVocab: false
+ doc: |
+ Specify the storage class to be used for intermediate and final output
+ fields:
+ class:
+ type: string
+ doc: "Always 'arv:StorageClassHint"
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
+ intermediateStorageClass:
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ doc: One or more storages classes
+ finalStorageClass:
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ doc: One or more storages classes
project_uuid:
type: string?
doc: The project that will own the container requests and intermediate collections
+
+- name: OutputStorageClass
+ type: record
+ extends: cwl:ProcessRequirement
+ inVocab: false
+ doc: |
+ Specify the storage class to be used for intermediate and final output
+ fields:
+ class:
+ type: string
+ doc: "Always 'arv:StorageClassHint"
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
+ intermediateStorageClass:
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ doc: One or more storages classes
+ finalStorageClass:
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ doc: One or more storages classes
project_uuid:
type: string?
doc: The project that will own the container requests and intermediate collections
+
+
+- name: OutputStorageClass
+ type: record
+ extends: cwl:ProcessRequirement
+ inVocab: false
+ doc: |
+ Specify the storage class to be used for intermediate and final output
+ fields:
+ class:
+ type: string
+ doc: "Always 'arv:StorageClassHint"
+ jsonldPredicate:
+ _id: "@type"
+ _type: "@vocab"
+ intermediateStorageClass:
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ doc: One or more storages classes
+ finalStorageClass:
+ type:
+ - "null"
+ - string
+ - type: array
+ items: string
+ doc: One or more storages classes
if self.output_ttl < 0:
raise WorkflowException("Invalid value %d for output_ttl, cannot be less than zero" % container_request["output_ttl"])
+ storage_class_req, _ = self.get_requirement("http://arvados.org/cwl#OutputStorageClass")
+ if storage_class_req and storage_class_req.get("intermediateStorageClass"):
+ container_request["output_storage_classes"] = aslist(storage_class_req["intermediateStorageClass"])
+ else:
+ container_request["output_storage_classes"] = runtimeContext.intermediate_storage_classes.strip().split(",")
+
if self.timelimit is not None and self.timelimit > 0:
scheduling_parameters["max_run_time"] = self.timelimit
if runtimeContext.storage_classes != "default":
command.append("--storage-classes=" + runtimeContext.storage_classes)
+ if runtimeContext.intermediate_storage_classes != "default":
+ command.append("--intermediate-storage-classes=" + runtimeContext.intermediate_storage_classes)
+
if self.on_error:
command.append("--on-error=" + self.on_error)
if runtimeContext.project_uuid:
cluster_target = runtimeContext.submit_runner_cluster or arvrunner.api._rootDesc["uuidPrefix"]
if not runtimeContext.project_uuid.startswith(cluster_target):
- raise WorkflowException("Project uuid '%s' must be for target cluster '%s'" % (runtimeContext.project_uuid, cluster_target))
+ raise WorkflowException("Project uuid '%s' should start with id of target cluster '%s'" % (runtimeContext.project_uuid, cluster_target))
+
try:
- arvrunner.api.groups().get(uuid=runtimeContext.project_uuid).execute()
+ if runtimeContext.project_uuid[5:12] == '-tpzed-':
+ arvrunner.api.users().get(uuid=runtimeContext.project_uuid).execute()
+ else:
+ proj = arvrunner.api.groups().get(uuid=runtimeContext.project_uuid).execute()
+ if proj["group_class"] != "project":
+ raise Exception("not a project, group_class is '%s'" % (proj["group_class"]))
except Exception as e:
raise WorkflowException("Invalid project uuid '%s': %s" % (runtimeContext.project_uuid, e))
self.wait = True
self.cwl_runner_job = None
self.storage_classes = "default"
+ self.intermediate_storage_classes = "default"
self.current_container = None
self.http_timeout = 300
self.submit_runner_cluster = None
from ._version import __version__
from cwltool.process import shortname, UnsupportedRequirement, use_custom_schema
-from cwltool.utils import adjustFileObjs, adjustDirObjs, get_listing, visit_class
+from cwltool.utils import adjustFileObjs, adjustDirObjs, get_listing, visit_class, aslist
from cwltool.command_line_tool import compute_checksums
from cwltool.load_tool import load_tool
if runtimeContext.submit_request_uuid and self.work_api != "containers":
raise Exception("--submit-request-uuid requires containers API, but using '{}' api".format(self.work_api))
+ default_storage_classes = ",".join([k for k,v in self.api.config()["StorageClasses"].items() if v.get("Default") is True])
+ if runtimeContext.storage_classes == "default":
+ runtimeContext.storage_classes = default_storage_classes
+ if runtimeContext.intermediate_storage_classes == "default":
+ runtimeContext.intermediate_storage_classes = default_storage_classes
+
if not runtimeContext.name:
runtimeContext.name = self.name = updated_tool.tool.get("label") or updated_tool.metadata.get("label") or os.path.basename(updated_tool.tool["id"])
if self.output_tags is None:
self.output_tags = ""
- storage_classes = runtimeContext.storage_classes.strip().split(",")
+ storage_classes = ""
+ storage_class_req, _ = tool.get_requirement("http://arvados.org/cwl#OutputStorageClass")
+ if storage_class_req and storage_class_req.get("finalStorageClass"):
+ storage_classes = aslist(storage_class_req["finalStorageClass"])
+ else:
+ storage_classes = runtimeContext.storage_classes.strip().split(",")
+
self.final_output, self.final_output_collection = self.make_output_collection(self.output_name, storage_classes, self.output_tags, self.final_output)
self.set_crunch_output()
fpm_depends+=(nodejs)
case "$TARGET" in
- ubuntu1604)
- fpm_depends+=(libcurl3-gnutls)
- ;;
debian* | ubuntu*)
fpm_depends+=(libcurl3-gnutls python3-distutils)
;;
'cwd': '/var/spool/cwl',
'scheduling_parameters': {},
'properties': {},
- 'secret_mounts': {}
+ 'secret_mounts': {},
+ 'output_storage_classes': ["default"]
}))
# The test passes some fields in builder.resources
'partitions': ['blurb']
},
'properties': {},
- 'secret_mounts': {}
+ 'secret_mounts': {},
+ 'output_storage_classes': ["default"]
}
call_body = call_kwargs.get('body', None)
'scheduling_parameters': {
},
'properties': {},
- 'secret_mounts': {}
+ 'secret_mounts': {},
+ 'output_storage_classes': ["default"]
}
call_body = call_kwargs.get('body', None)
'cwd': '/var/spool/cwl',
'scheduling_parameters': {},
'properties': {},
- 'secret_mounts': {}
+ 'secret_mounts': {},
+ 'output_storage_classes': ["default"]
}))
@mock.patch("arvados.collection.Collection")
'cwd': '/var/spool/cwl',
'scheduling_parameters': {},
'properties': {},
- 'secret_mounts': {}
+ 'secret_mounts': {},
+ 'output_storage_classes': ["default"]
}))
# The test passes no builder.resources
"content": "username: user\npassword: blorp\n",
"kind": "text"
}
- }
+ },
+ 'output_storage_classes': ["default"]
}))
# The test passes no builder.resources
self.assertEqual(42, kwargs['body']['scheduling_parameters'].get('max_run_time'))
+ # The test passes no builder.resources
+ # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
+ @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
+ def test_setting_storage_class(self, keepdocker):
+ arv_docker_clear_cache()
+
+ runner = mock.MagicMock()
+ runner.ignore_docker_for_reuse = False
+ runner.intermediate_output_ttl = 0
+ runner.secret_store = cwltool.secrets.SecretStore()
+
+ keepdocker.return_value = [("zzzzz-4zz18-zzzzzzzzzzzzzz3", "")]
+ runner.api.collections().get().execute.return_value = {
+ "portable_data_hash": "99999999999999999999999999999993+99"}
+
+ tool = cmap({
+ "inputs": [],
+ "outputs": [],
+ "baseCommand": "ls",
+ "arguments": [{"valueFrom": "$(runtime.outdir)"}],
+ "id": "#",
+ "class": "CommandLineTool",
+ "hints": [
+ {
+ "class": "http://arvados.org/cwl#OutputStorageClass",
+ "finalStorageClass": ["baz_sc", "qux_sc"],
+ "intermediateStorageClass": ["foo_sc", "bar_sc"]
+ }
+ ]
+ })
+
+ loadingContext, runtimeContext = self.helper(runner, True)
+
+ arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, loadingContext)
+ arvtool.formatgraph = None
+
+ for j in arvtool.job({}, mock.MagicMock(), runtimeContext):
+ j.run(runtimeContext)
+ runner.api.container_requests().create.assert_called_with(
+ body=JsonDiffMatcher({
+ 'environment': {
+ 'HOME': '/var/spool/cwl',
+ 'TMPDIR': '/tmp'
+ },
+ 'name': 'test_run_True',
+ 'runtime_constraints': {
+ 'vcpus': 1,
+ 'ram': 1073741824
+ },
+ 'use_existing': True,
+ 'priority': 500,
+ 'mounts': {
+ '/tmp': {'kind': 'tmp',
+ "capacity": 1073741824
+ },
+ '/var/spool/cwl': {'kind': 'tmp',
+ "capacity": 1073741824 }
+ },
+ 'state': 'Committed',
+ 'output_name': 'Output for step test_run_True',
+ 'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+ 'output_path': '/var/spool/cwl',
+ 'output_ttl': 0,
+ 'container_image': '99999999999999999999999999999993+99',
+ 'command': ['ls', '/var/spool/cwl'],
+ 'cwd': '/var/spool/cwl',
+ 'scheduling_parameters': {},
+ 'properties': {},
+ 'secret_mounts': {},
+ 'output_storage_classes': ["foo_sc", "bar_sc"]
+ }))
+
+
class TestWorkflow(unittest.TestCase):
def setUp(self):
cwltool.process._names = set()
"scheduling_parameters": {},
"secret_mounts": {},
"state": "Committed",
- "use_existing": True
+ "use_existing": True,
+ 'output_storage_classes': ["default"]
}))
mockc.open().__enter__().write.assert_has_calls([mock.call(subwf)])
mockc.open().__enter__().write.assert_has_calls([mock.call(
],
'use_existing': True,
'output_name': u'Output for step echo-subwf',
- 'cwd': '/var/spool/cwl'
+ 'cwd': '/var/spool/cwl',
+ 'output_storage_classes': ["default"]
}))
def test_default_work_api(self):
stubs.api.containers().current().execute.return_value = {
"uuid": stubs.fake_container_uuid,
}
+ stubs.api.config()["StorageClasses"].items.return_value = {
+ "default": {
+ "Default": True
+ }
+ }.items()
class CollectionExecute(object):
def __init__(self, exe):
cwltool.process._names = set()
arvados_cwl.arvdocker.arv_docker_clear_cache()
- @stubs
- def test_error_when_multiple_storage_classes_specified(self, stubs):
- storage_classes = "foo,bar"
- exited = arvados_cwl.main(
- ["--debug", "--storage-classes", storage_classes,
- "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
- sys.stdin, sys.stderr, api_client=stubs.api)
- self.assertEqual(exited, 1)
@mock.patch("time.sleep")
@stubs
stubs.expect_container_request_uuid + '\n')
self.assertEqual(exited, 0)
+ @stubs
+ def test_submit_multiple_storage_classes(self, stubs):
+ exited = arvados_cwl.main(
+ ["--debug", "--submit", "--no-wait", "--api=containers", "--storage-classes=foo,bar", "--intermediate-storage-classes=baz",
+ "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+ stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+
+ expect_container = copy.deepcopy(stubs.expect_container_spec)
+ expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
+ '--no-log-timestamps', '--disable-validate', '--disable-color',
+ '--eval-timeout=20', '--thread-count=0',
+ '--enable-reuse', "--collection-cache-size=256", "--debug",
+ "--storage-classes=foo,bar", "--intermediate-storage-classes=baz", '--on-error=continue',
+ '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json']
+
+ stubs.api.container_requests().create.assert_called_with(
+ body=JsonDiffMatcher(expect_container))
+ self.assertEqual(stubs.capture_stdout.getvalue(),
+ stubs.expect_container_request_uuid + '\n')
+ self.assertEqual(exited, 0)
+
@mock.patch("cwltool.task_queue.TaskQueue")
@mock.patch("arvados_cwl.arvworkflow.ArvadosWorkflow.job")
@mock.patch("arvados_cwl.executor.ArvCwlExecutor.make_output_collection")
make_output.assert_called_with(u'Output of submit_wf.cwl', ['default'], '', 'zzzzz-4zz18-zzzzzzzzzzzzzzzz')
self.assertEqual(exited, 0)
+ @mock.patch("cwltool.task_queue.TaskQueue")
+ @mock.patch("arvados_cwl.arvworkflow.ArvadosWorkflow.job")
+ @mock.patch("arvados_cwl.executor.ArvCwlExecutor.make_output_collection")
+ @stubs
+ def test_storage_class_hint_to_make_output_collection(self, stubs, make_output, job, tq):
+ final_output_c = arvados.collection.Collection()
+ make_output.return_value = ({},final_output_c)
+
+ def set_final_output(job_order, output_callback, runtimeContext):
+ output_callback("zzzzz-4zz18-zzzzzzzzzzzzzzzz", "success")
+ return []
+ job.side_effect = set_final_output
+
+ exited = arvados_cwl.main(
+ ["--debug", "--local",
+ "tests/wf/submit_storage_class_wf.cwl", "tests/submit_test_job.json"],
+ stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+
+ make_output.assert_called_with(u'Output of submit_storage_class_wf.cwl', ['foo', 'bar'], '', 'zzzzz-4zz18-zzzzzzzzzzzzzzzz')
+ self.assertEqual(exited, 0)
+
@stubs
def test_submit_container_output_ttl(self, stubs):
exited = arvados_cwl.main(
@stubs
def test_submit_container_project(self, stubs):
project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+ stubs.api.groups().get().execute.return_value = {"group_class": "project"}
exited = arvados_cwl.main(
["--submit", "--no-wait", "--api=containers", "--debug", "--project-uuid="+project_uuid,
"tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
@stubs
def test_submit_validate_project_uuid(self, stubs):
+ # Fails with bad cluster prefix
exited = arvados_cwl.main(
["--submit", "--no-wait", "--api=containers", "--debug", "--project-uuid=zzzzb-j7d0g-zzzzzzzzzzzzzzz",
"tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
self.assertEqual(exited, 1)
+ # Project lookup fails
stubs.api.groups().get().execute.side_effect = Exception("Bad project")
exited = arvados_cwl.main(
["--submit", "--no-wait", "--api=containers", "--debug", "--project-uuid=zzzzz-j7d0g-zzzzzzzzzzzzzzx",
stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
self.assertEqual(exited, 1)
+ # It should work this time because it is looking up a user (and only group is stubbed out to fail)
+ exited = arvados_cwl.main(
+ ["--submit", "--no-wait", "--api=containers", "--debug", "--project-uuid=zzzzz-tpzed-zzzzzzzzzzzzzzx",
+ "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+ stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+ self.assertEqual(exited, 0)
+
+
@mock.patch("arvados.collection.CollectionReader")
@stubs
def test_submit_uuid_inputs(self, stubs, collectionReader):
@stubs
def test_create(self, stubs):
project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+ stubs.api.groups().get().execute.return_value = {"group_class": "project"}
exited = arvados_cwl.main(
["--create-workflow", "--debug",
@stubs
def test_create_name(self, stubs):
project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+ stubs.api.groups().get().execute.return_value = {"group_class": "project"}
exited = arvados_cwl.main(
["--create-workflow", "--debug",
@stubs
def test_create_collection_per_tool(self, stubs):
project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+ stubs.api.groups().get().execute.return_value = {"group_class": "project"}
exited = arvados_cwl.main(
["--create-workflow", "--debug",
@stubs
def test_create_with_imports(self, stubs):
project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+ stubs.api.groups().get().execute.return_value = {"group_class": "project"}
exited = arvados_cwl.main(
["--create-workflow", "--debug",
@stubs
def test_create_with_no_input(self, stubs):
project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+ stubs.api.groups().get().execute.return_value = {"group_class": "project"}
exited = arvados_cwl.main(
["--create-workflow", "--debug",
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Test case for arvados-cwl-runner
+#
+# Used to test whether scanning a workflow file for dependencies
+# (e.g. submit_tool.cwl) and uploading to Keep works as intended.
+
+class: Workflow
+cwlVersion: v1.0
+$namespaces:
+ arv: "http://arvados.org/cwl#"
+hints:
+ arv:OutputStorageClass:
+ finalStorageClass: [foo, bar]
+inputs:
+ - id: x
+ type: File
+ - id: y
+ type: Directory
+ - id: z
+ type: Directory
+outputs: []
+steps:
+ - id: step1
+ in:
+ - { id: x, source: "#x" }
+ out: []
+ run: ../tool/submit_tool.cwl
package arvados
import (
- "bufio"
+ "bytes"
"crypto/md5"
"fmt"
"regexp"
- "strings"
"time"
"git.arvados.org/arvados.git/sdk/go/blockdigest"
//
// Zero-length blocks are not included.
func (c *Collection) SizedDigests() ([]SizedDigest, error) {
- manifestText := c.ManifestText
- if manifestText == "" {
- manifestText = c.UnsignedManifestText
+ manifestText := []byte(c.ManifestText)
+ if len(manifestText) == 0 {
+ manifestText = []byte(c.UnsignedManifestText)
}
- if manifestText == "" && c.PortableDataHash != "d41d8cd98f00b204e9800998ecf8427e+0" {
+ if len(manifestText) == 0 && c.PortableDataHash != "d41d8cd98f00b204e9800998ecf8427e+0" {
// TODO: Check more subtle forms of corruption, too
return nil, fmt.Errorf("manifest is missing")
}
- var sds []SizedDigest
- scanner := bufio.NewScanner(strings.NewReader(manifestText))
- scanner.Buffer(make([]byte, 1048576), len(manifestText))
- for scanner.Scan() {
- line := scanner.Text()
- tokens := strings.Split(line, " ")
+ sds := make([]SizedDigest, 0, len(manifestText)/40)
+ for _, line := range bytes.Split(manifestText, []byte{'\n'}) {
+ if len(line) == 0 {
+ continue
+ }
+ tokens := bytes.Split(line, []byte{' '})
if len(tokens) < 3 {
return nil, fmt.Errorf("Invalid stream (<3 tokens): %q", line)
}
for _, token := range tokens[1:] {
- if !blockdigest.LocatorPattern.MatchString(token) {
+ if !blockdigest.LocatorPattern.Match(token) {
// FIXME: ensure it's a file token
break
}
- if strings.HasPrefix(token, "d41d8cd98f00b204e9800998ecf8427e+0") {
+ if bytes.HasPrefix(token, []byte("d41d8cd98f00b204e9800998ecf8427e+0")) {
// Exclude "empty block" placeholder
continue
}
// FIXME: shouldn't assume 32 char hash
- if i := strings.IndexRune(token[33:], '+'); i >= 0 {
+ if i := bytes.IndexRune(token[33:], '+'); i >= 0 {
token = token[:33+i]
}
- sds = append(sds, SizedDigest(token))
+ sds = append(sds, SizedDigest(string(token)))
}
}
- return sds, scanner.Err()
+ return sds, nil
}
type CollectionList struct {
BalanceCollectionBatch int
BalanceCollectionBuffers int
BalanceTimeout Duration
+ BalanceUpdateLimit int
WebDAVCache WebDAVCacheConfig
NewUserNotificationRecipients StringSet
NewUsersAreActive bool
UserNotifierEmailFrom string
+ UserNotifierEmailBcc StringSet
UserProfileNotificationAddress string
PreferDomainForUsername string
UserSetupMailText string
}
- Volumes map[string]Volume
- Workbench struct {
+ StorageClasses map[string]StorageClassConfig
+ Volumes map[string]Volume
+ Workbench struct {
ActivationContactLink string
APIClientConnectTimeout Duration
APIClientReceiveTimeout Duration
}
}
+type StorageClassConfig struct {
+ Default bool
+ Priority int
+}
+
type Volume struct {
AccessViaHosts map[URL]VolumeAccess
ReadOnly bool
Composer Service
Controller Service
DispatchCloud Service
+ DispatchLSF Service
GitHTTP Service
GitSSH Service
Health Service
AssignNodeHostname string
}
}
+ LSF struct {
+ BsubSudoUser string
+ BsubArgumentsList []string
+ }
}
type CloudVMsConfig struct {
ServiceNameRailsAPI ServiceName = "arvados-api-server"
ServiceNameController ServiceName = "arvados-controller"
ServiceNameDispatchCloud ServiceName = "arvados-dispatch-cloud"
+ ServiceNameDispatchLSF ServiceName = "arvados-dispatch-lsf"
ServiceNameHealth ServiceName = "arvados-health"
ServiceNameWorkbench1 ServiceName = "arvados-workbench1"
ServiceNameWorkbench2 ServiceName = "arvados-workbench2"
ServiceNameRailsAPI: svcs.RailsAPI,
ServiceNameController: svcs.Controller,
ServiceNameDispatchCloud: svcs.DispatchCloud,
+ ServiceNameDispatchLSF: svcs.DispatchLSF,
ServiceNameHealth: svcs.Health,
ServiceNameWorkbench1: svcs.Workbench1,
ServiceNameWorkbench2: svcs.Workbench2,
GatewayAddress string `json:"gateway_address"`
InteractiveSessionStarted bool `json:"interactive_session_started"`
OutputStorageClasses []string `json:"output_storage_classes"`
+ RuntimeUserUUID string `json:"runtime_user_uuid"`
+ RuntimeAuthScopes []string `json:"runtime_auth_scopes"`
+ RuntimeToken string `json:"runtime_token"`
}
// ContainerRequest is an arvados#container_request resource.
}
}
node, err = func() (inode, error) {
- node.RLock()
- defer node.RUnlock()
+ node.Lock()
+ defer node.Unlock()
return node.Child(name, nil)
}()
if node == nil || err != nil {
UserAgreementPDH = "b519d9cb706a29fc7ea24dbea2f05851+93"
HelloWorldPdh = "55713e6a34081eb03609e7ad5fcad129+62"
- MultilevelCollection1 = "zzzzz-4zz18-pyw8yp9g3pr7irn"
+ MultilevelCollection1 = "zzzzz-4zz18-pyw8yp9g3pr7irn"
+ StorageClassesDesiredDefaultConfirmedDefault = "zzzzz-4zz18-3t236wr12769tga"
+ StorageClassesDesiredArchiveConfirmedDefault = "zzzzz-4zz18-3t236wr12769qqa"
+ EmptyCollectionUUID = "zzzzz-4zz18-gs9ooj1h9sd5mde"
AProjectUUID = "zzzzz-j7d0g-v955i6s2oi1cbso"
ASubprojectUUID = "zzzzz-j7d0g-axqo7eu9pwvna1x"
QueuedContainerRequestUUID = "zzzzz-xvhdp-cr4queuedcontnr"
QueuedContainerUUID = "zzzzz-dz642-queuedcontainer"
+ LockedContainerUUID = "zzzzz-dz642-lockedcontainer"
+
RunningContainerUUID = "zzzzz-dz642-runningcontainr"
CompletedContainerUUID = "zzzzz-dz642-compltcontainer"
package dispatch
import (
+ "bytes"
"context"
"fmt"
"sync"
"time"
+ "git.arvados.org/arvados.git/lib/dispatchcloud"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadosclient"
"github.com/sirupsen/logrus"
// running, and return.
//
// The DispatchFunc should not return until the container is finished.
-type DispatchFunc func(*Dispatcher, arvados.Container, <-chan arvados.Container)
+type DispatchFunc func(*Dispatcher, arvados.Container, <-chan arvados.Container) error
// Run watches the API server's queue for containers that are either
// ready to run and available to lock, or are already locked by this
}
tracker.updates <- c
go func() {
- d.RunContainer(d, c, tracker.updates)
- // RunContainer blocks for the lifetime of the container. When
- // it returns, the tracker should delete itself.
+ err := d.RunContainer(d, c, tracker.updates)
+ if err != nil {
+ text := fmt.Sprintf("Error running container %s: %s", c.UUID, err)
+ if err, ok := err.(dispatchcloud.ConstraintsNotSatisfiableError); ok {
+ var logBuf bytes.Buffer
+ fmt.Fprintf(&logBuf, "cannot run container %s: %s\n", c.UUID, err)
+ if len(err.AvailableTypes) == 0 {
+ fmt.Fprint(&logBuf, "No instance types are configured.\n")
+ } else {
+ fmt.Fprint(&logBuf, "Available instance types:\n")
+ for _, t := range err.AvailableTypes {
+ fmt.Fprintf(&logBuf,
+ "Type %q: %d VCPUs, %d RAM, %d Scratch, %f Price\n",
+ t.Name, t.VCPUs, t.RAM, t.Scratch, t.Price)
+ }
+ }
+ text = logBuf.String()
+ d.UpdateState(c.UUID, Cancelled)
+ }
+ d.Logger.Printf("%s", text)
+ lr := arvadosclient.Dict{"log": arvadosclient.Dict{
+ "object_uuid": c.UUID,
+ "event_type": "dispatch",
+ "properties": map[string]string{"text": text}}}
+ d.Arv.Create("logs", lr, nil)
+ d.Unlock(c.UUID)
+ }
+
d.mtx.Lock()
delete(d.trackers, c.UUID)
d.mtx.Unlock()
time.AfterFunc(10*time.Second, func() { done <- false })
d := &Dispatcher{
Arv: arv,
- RunContainer: func(dsp *Dispatcher, ctr arvados.Container, status <-chan arvados.Container) {
+ RunContainer: func(dsp *Dispatcher, ctr arvados.Container, status <-chan arvados.Container) error {
for ctr := range status {
c.Logf("%#v", ctr)
}
done <- true
+ return nil
},
}
d.TrackContainer(arvadostest.QueuedContainerUUID)
for _, svc := range []*arvados.Service{
&svcs.Controller,
&svcs.DispatchCloud,
+ &svcs.DispatchLSF,
&svcs.Keepbalance,
&svcs.Keepproxy,
&svcs.Keepstore,
}
req.Header.Set(HeaderRequestID, gen.Next())
}
+ w.Header().Set("X-Request-Id", req.Header.Get("X-Request-Id"))
h.ServeHTTP(w, req)
})
}
multipleResponseError
}
-type InsufficientReplicasError error
+type InsufficientReplicasError struct{ error }
-type OversizeBlockError error
+type OversizeBlockError struct{ error }
-var ErrOversizeBlock = OversizeBlockError(errors.New("Exceeded maximum block size (" + strconv.Itoa(BLOCKSIZE) + ")"))
+var ErrOversizeBlock = OversizeBlockError{error: errors.New("Exceeded maximum block size (" + strconv.Itoa(BLOCKSIZE) + ")")}
var MissingArvadosApiHost = errors.New("Missing required environment variable ARVADOS_API_HOST")
var MissingArvadosApiToken = errors.New("Missing required environment variable ARVADOS_API_TOKEN")
var InvalidLocatorError = errors.New("Invalid locator")
"bytes"
"context"
"crypto/md5"
- "errors"
"fmt"
"io"
"io/ioutil"
_, replicas, err := kc.PutB([]byte("foo"))
- c.Check(err, FitsTypeOf, InsufficientReplicasError(errors.New("")))
+ c.Check(err, FitsTypeOf, InsufficientReplicasError{})
c.Check(replicas, Equals, 1)
c.Check(<-st.handled, Equals, ks1[0].url)
}
_, replicas, err := kc.PutB([]byte("foo"))
<-st.handled
- c.Check(err, FitsTypeOf, InsufficientReplicasError(errors.New("")))
+ c.Check(err, FitsTypeOf, InsufficientReplicasError{})
c.Check(replicas, Equals, 2)
}
_, replicas, err := kc.PutB([]byte("foo"))
- c.Check(err, FitsTypeOf, InsufficientReplicasError(errors.New("")))
+ c.Check(err, FitsTypeOf, InsufficientReplicasError{})
c.Check(replicas, Equals, 1)
c.Check(<-st.handled, Equals, localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", 0)])
_, replicas, err := kc.PutB([]byte("foo"))
- c.Check(err, FitsTypeOf, InsufficientReplicasError(errors.New("")))
+ c.Check(err, FitsTypeOf, InsufficientReplicasError{})
c.Check(replicas, Equals, 0)
}
msg += resp + "; "
}
msg = msg[:len(msg)-2]
- return resp, InsufficientReplicasError(errors.New(msg))
+ return resp, InsufficientReplicasError{error: errors.New(msg)}
}
break
}
copy_opts.add_argument(
'--project-uuid', dest='project_uuid',
help='The UUID of the project at the destination to which the collection or workflow should be copied.')
+ copy_opts.add_argument(
+ '--storage-classes', dest='storage_classes',
+ help='Comma separated list of storage classes to be used when saving data to the destinaton Arvados instance.')
copy_opts.add_argument(
'object_uuid',
parents=[copy_opts, arv_cmd.retry_opt])
args = parser.parse_args()
+ if args.storage_classes:
+ args.storage_classes = [x for x in args.storage_classes.strip().replace(' ', '').split(',') if x]
+
if args.verbose:
logger.setLevel(logging.DEBUG)
else:
if not body["name"]:
body['name'] = "copied from " + collection_uuid
+ if args.storage_classes:
+ body['storage_classes_desired'] = args.storage_classes
+
body['owner_uuid'] = args.project_uuid
dst_collection = dst.collections().create(body=body, ensure_unique_name=True).execute(num_retries=args.retries)
if progress_writer:
progress_writer.report(obj_uuid, bytes_written, bytes_expected)
data = src_keep.get(word)
- dst_locator = dst_keep.put(data)
+ dst_locator = dst_keep.put(data, classes=(args.storage_classes or []))
dst_locators[blockhash] = dst_locator
bytes_written += loc.size
dst_manifest.write(' ')
arguments = [i for i in arguments if i not in (args.image, args.tag, image_repo_tag)]
put_args = keepdocker_parser.parse_known_args(arguments)[1]
+ # Don't fail when cached manifest is invalid, just ignore the cache.
+ put_args += ['--batch']
+
if args.name is None:
put_args += ['--name', collection_name]
still be displayed.)
""")
+run_opts.add_argument('--batch', action='store_true', default=False,
+ help="""
+Retries with '--no-resume --no-cache' if cached state contains invalid/expired
+block signatures.
+""")
+
_group = run_opts.add_mutually_exclusive_group()
_group.add_argument('--resume', action='store_true', default=True,
help="""
}
def __init__(self, paths, resume=True, use_cache=True, reporter=None,
- name=None, owner_uuid=None, api_client=None,
+ name=None, owner_uuid=None, api_client=None, batch_mode=False,
ensure_unique_name=False, num_retries=None,
put_threads=None, replication_desired=None, filename=None,
update_time=60.0, update_collection=None, storage_classes=None,
self.paths = paths
self.resume = resume
self.use_cache = use_cache
+ self.batch_mode = batch_mode
self.update = False
self.reporter = reporter
# This will set to 0 before start counting, if no special files are going
# No cache file, set empty state
self._state = copy.deepcopy(self.EMPTY_STATE)
if not self._cached_manifest_valid():
- raise ResumeCacheInvalidError()
+ if not self.batch_mode:
+ raise ResumeCacheInvalidError()
+ else:
+ self.logger.info("Invalid signatures on cache file '{}' while being run in 'batch mode' -- continuing anyways.".format(self._cache_file.name))
+ self.use_cache = False # Don't overwrite preexisting cache file.
+ self._state = copy.deepcopy(self.EMPTY_STATE)
# Load the previous manifest so we can check if files were modified remotely.
self._local_collection = arvados.collection.Collection(
self._state['manifest'],
replication_desired=self.replication_desired,
- storage_classes_desired=(self.storage_classes or ['default']),
+ storage_classes_desired=self.storage_classes,
put_threads=self.put_threads,
api_client=self._api_client,
num_retries=self.num_retries)
writer = ArvPutUploadJob(paths = args.paths,
resume = args.resume,
use_cache = args.use_cache,
+ batch_mode= args.batch,
filename = args.filename,
reporter = reporter,
api_client = api_client,
" or been created with another Arvados user's credentials.",
" Switch user or use one of the following options to restart upload:",
" --no-resume to start a new resume cache.",
- " --no-cache to disable resume cache."]))
+ " --no-cache to disable resume cache.",
+ " --batch to ignore the resume cache if invalid."]))
sys.exit(1)
except (CollectionUpdateError, PathDoesNotExistError) as error:
logger.error("\n".join([
with c.open('foo', 'wt') as f:
f.write('foo')
c.save_new("arv-copy foo collection", owner_uuid=src_proj)
+ coll_record = api.collections().get(uuid=c.manifest_locator()).execute()
+ assert coll_record['storage_classes_desired'] == ['default']
dest_proj = api.groups().create(body={"group": {"name": "arv-copy dest project", "group_class": "project"}}).execute()["uuid"]
assert len(contents["items"]) == 0
try:
- self.run_copy(["--project-uuid", dest_proj, src_proj])
+ self.run_copy(["--project-uuid", dest_proj, "--storage-classes", "foo", src_proj])
except SystemExit as e:
assert e.code == 0
assert contents["items"][0]["uuid"] != c.manifest_locator()
assert contents["items"][0]["name"] == "arv-copy foo collection"
assert contents["items"][0]["portable_data_hash"] == c.portable_data_hash()
+ assert contents["items"][0]["storage_classes_desired"] == ["foo"]
finally:
os.environ['HOME'] = home_was
r'INFO: Cache expired, starting from scratch.*')
self.assertEqual(p.returncode, 0)
- def test_invalid_signature_invalidates_cache(self):
- self.authorize_with('active')
- tmpdir = self.make_tmpdir()
- with open(os.path.join(tmpdir, 'somefile.txt'), 'w') as f:
- f.write('foo')
- # Upload a directory and get the cache file name
- p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- env=self.ENVIRON)
- (_, err) = p.communicate()
- self.assertRegex(err.decode(), r'INFO: Creating new cache file at ')
- self.assertEqual(p.returncode, 0)
- cache_filepath = re.search(r'INFO: Creating new cache file at (.*)',
- err.decode()).groups()[0]
- self.assertTrue(os.path.isfile(cache_filepath))
- # Load the cache file contents and modify the manifest to simulate
- # an invalid access token
- with open(cache_filepath, 'r') as c:
- cache = json.load(c)
- self.assertRegex(cache['manifest'], r'\+A\S+\@')
- cache['manifest'] = re.sub(
- r'\+A.*\@',
- "+Aabcdef0123456789abcdef0123456789abcdef01@",
- cache['manifest'])
- with open(cache_filepath, 'w') as c:
- c.write(json.dumps(cache))
- # Re-run the upload and expect to get an invalid cache message
- p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- env=self.ENVIRON)
- (_, err) = p.communicate()
- self.assertRegex(
- err.decode(),
- r'ERROR: arv-put: Resume cache contains invalid signature.*')
- self.assertEqual(p.returncode, 1)
+ def test_invalid_signature_in_cache(self):
+ for batch_mode in [False, True]:
+ self.authorize_with('active')
+ tmpdir = self.make_tmpdir()
+ with open(os.path.join(tmpdir, 'somefile.txt'), 'w') as f:
+ f.write('foo')
+ # Upload a directory and get the cache file name
+ arv_put_args = [tmpdir]
+ if batch_mode:
+ arv_put_args = ['--batch'] + arv_put_args
+ p = subprocess.Popen([sys.executable, arv_put.__file__] + arv_put_args,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=self.ENVIRON)
+ (_, err) = p.communicate()
+ self.assertRegex(err.decode(), r'INFO: Creating new cache file at ')
+ self.assertEqual(p.returncode, 0)
+ cache_filepath = re.search(r'INFO: Creating new cache file at (.*)',
+ err.decode()).groups()[0]
+ self.assertTrue(os.path.isfile(cache_filepath))
+ # Load the cache file contents and modify the manifest to simulate
+ # an invalid access token
+ with open(cache_filepath, 'r') as c:
+ cache = json.load(c)
+ self.assertRegex(cache['manifest'], r'\+A\S+\@')
+ cache['manifest'] = re.sub(
+ r'\+A.*\@',
+ "+Aabcdef0123456789abcdef0123456789abcdef01@",
+ cache['manifest'])
+ with open(cache_filepath, 'w') as c:
+ c.write(json.dumps(cache))
+ # Re-run the upload and expect to get an invalid cache message
+ p = subprocess.Popen([sys.executable, arv_put.__file__] + arv_put_args,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=self.ENVIRON)
+ (_, err) = p.communicate()
+ if not batch_mode:
+ self.assertRegex(
+ err.decode(),
+ r'ERROR: arv-put: Resume cache contains invalid signature.*')
+ self.assertEqual(p.returncode, 1)
+ else:
+ self.assertRegex(
+ err.decode(),
+ r'Invalid signatures on cache file \'.*\' while being run in \'batch mode\' -- continuing anyways.*')
+ self.assertEqual(p.returncode, 0)
def test_single_expired_signature_reuploads_file(self):
self.authorize_with('active')
end
err[:errors] ||= args
err[:errors].map! do |err|
- err += " (" + Thread.current[:request_id] + ")"
+ err += " (#{request.request_id})"
end
err[:error_token] = [Time.now.utc.to_i, "%08x" % rand(16 ** 8)].join("+")
status = err.delete(:status) || 422
end
def set_current_request_id
- req_id = request.headers['X-Request-Id']
- if !req_id || req_id.length < 1 || req_id.length > 1024
- # Client-supplied ID is either missing or too long to be
- # considered friendly.
- req_id = "req-" + Random::DEFAULT.rand(2**128).to_s(36)[0..19]
- end
- response.headers['X-Request-Id'] = Thread.current[:request_id] = req_id
- Rails.logger.tagged(req_id) do
+ Rails.logger.tagged(request.request_id) do
yield
end
- Thread.current[:request_id] = nil
end
def append_info_to_payload(payload)
def account_is_setup(user)
@user = user
- mail(to: user.email, subject: 'Welcome to Arvados - account enabled')
+ if not Rails.configuration.Users.UserNotifierEmailBcc.empty? then
+ @bcc = Rails.configuration.Users.UserNotifierEmailBcc.keys
+ mail(to: user.email, subject: 'Welcome to Arvados - account enabled', bcc: @bcc)
+ else
+ mail(to: user.email, subject: 'Welcome to Arvados - account enabled')
+ end
end
end
# Posgresql JSONB columns should NOT be declared as serialized, Rails 5
# already know how to properly treat them.
attribute :properties, :jsonbHash, default: {}
- attribute :storage_classes_desired, :jsonbArray, default: ["default"]
+ attribute :storage_classes_desired, :jsonbArray, default: lambda { Rails.configuration.DefaultStorageClasses }
attribute :storage_classes_confirmed, :jsonbArray, default: []
before_validation :default_empty_manifest
def strip_signatures_and_update_replication_confirmed
if self.manifest_text_changed?
in_old_manifest = {}
- if not self.replication_confirmed.nil?
+ # manifest_text_was could be nil when dealing with a freshly created snapshot,
+ # so we skip this case because there was no real manifest change. (Bug #18005)
+ if (not self.replication_confirmed.nil?) and (not self.manifest_text_was.nil?)
self.class.each_manifest_locator(manifest_text_was) do |match|
in_old_manifest[match[1]] = true
end
# validation on empty desired storage classes return an error.
def default_storage_classes
if self.storage_classes_desired.nil? || self.storage_classes_desired.empty?
- self.storage_classes_desired = ["default"]
+ self.storage_classes_desired = Rails.configuration.DefaultStorageClasses
end
self.storage_classes_confirmed ||= []
end
# already know how to properly treat them.
attribute :secret_mounts, :jsonbHash, default: {}
attribute :runtime_status, :jsonbHash, default: {}
- attribute :runtime_auth_scopes, :jsonbHash, default: {}
- attribute :output_storage_classes, :jsonbArray, default: ["default"]
+ attribute :runtime_auth_scopes, :jsonbArray, default: []
+ attribute :output_storage_classes, :jsonbArray, default: lambda { Rails.configuration.DefaultStorageClasses }
serialize :environment, Hash
serialize :mounts, Hash
# already know how to properly treat them.
attribute :properties, :jsonbHash, default: {}
attribute :secret_mounts, :jsonbHash, default: {}
- attribute :output_storage_classes, :jsonbArray, default: ["default"]
+ attribute :output_storage_classes, :jsonbArray, default: lambda { Rails.configuration.DefaultStorageClasses }
serialize :environment, Hash
serialize :mounts, Hash
arvcfg.declare_config "Users.AdminNotifierEmailFrom", String, :admin_notifier_email_from
arvcfg.declare_config "Users.EmailSubjectPrefix", String, :email_subject_prefix
arvcfg.declare_config "Users.UserNotifierEmailFrom", String, :user_notifier_email_from
+arvcfg.declare_config "Users.UserNotifierEmailBcc", Hash
arvcfg.declare_config "Users.NewUserNotificationRecipients", Hash, :new_user_notification_recipients, ->(cfg, k, v) { arrayToHash cfg, "Users.NewUserNotificationRecipients", v }
arvcfg.declare_config "Users.NewInactiveUserNotificationRecipients", Hash, :new_inactive_user_notification_recipients, method(:arrayToHash)
arvcfg.declare_config "Login.LoginCluster", String
ConfigLoader.set_cfg cfg, "RemoteClusters", h
}
arvcfg.declare_config "RemoteClusters.*.Proxy", Boolean, :remote_hosts_via_dns
+arvcfg.declare_config "StorageClasses", Hash
dbcfg = ConfigLoader.new
raise "default_trash_lifetime is %d, must be at least 86400" % Rails.configuration.Collections.DefaultTrashLifetime
end
+default_storage_classes = []
+$arvados_config["StorageClasses"].each do |cls, cfg|
+ if cfg["Default"]
+ default_storage_classes << cls
+ end
+end
+if default_storage_classes.length == 0
+ default_storage_classes = ["default"]
+end
+$arvados_config["DefaultStorageClasses"] = default_storage_classes.sort
+
#
# Special case for test database where there's no database.yml,
# because the Arvados config.yml doesn't have a concept of multiple
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+module CustomRequestId
+ def make_request_id(req_id)
+ if !req_id || req_id.length < 1 || req_id.length > 1024
+ # Client-supplied ID is either missing or too long to be
+ # considered friendly.
+ internal_request_id
+ else
+ req_id
+ end
+ end
+
+ def internal_request_id
+ "req-" + Random::DEFAULT.rand(2**128).to_s(36)[0..19]
+ end
+end
+
+class ActionDispatch::RequestId
+ # Instead of using the default UUID-like format for X-Request-Id headers,
+ # use our own.
+ prepend CustomRequestId
+end
\ No newline at end of file
namespace :db do
desc "Apply expiration policy on long lived tokens"
task fix_long_lived_tokens: :environment do
- if Rails.configuration.Login.TokenLifetime == 0
- puts("No expiration policy set on Login.TokenLifetime.")
- else
- exp_date = Time.now + Rails.configuration.Login.TokenLifetime
- puts("Setting token expiration to: #{exp_date}")
- token_count = 0
- ll_tokens.each do |auth|
- if (auth.user.uuid =~ /-tpzed-000000000000000/).nil?
- CurrentApiClientHelper.act_as_system_user do
- auth.update_attributes!(expires_at: exp_date)
- end
- token_count += 1
+ lifetime = Rails.configuration.API.MaxTokenLifetime
+ if lifetime.nil? or lifetime == 0
+ lifetime = Rails.configuration.Login.TokenLifetime
+ end
+ if lifetime.nil? or lifetime == 0
+ puts("No expiration policy set (API.MaxTokenLifetime nor Login.TokenLifetime is set), nothing to do.")
+ # abort the rake task
+ next
+ end
+ exp_date = Time.now + lifetime
+ puts("Setting token expiration to: #{exp_date}")
+ token_count = 0
+ ll_tokens(lifetime).each do |auth|
+ if auth.user.nil?
+ printf("*** WARNING, found ApiClientAuthorization with invalid user: auth id: %d, user id: %d\n", auth.id, auth.user_id)
+ # skip this token
+ next
+ end
+ if (auth.user.uuid =~ /-tpzed-000000000000000/).nil?
+ CurrentApiClientHelper.act_as_system_user do
+ auth.update_attributes!(expires_at: exp_date)
end
+ token_count += 1
end
- puts("#{token_count} tokens updated.")
end
+ puts("#{token_count} tokens updated.")
end
desc "Show users with long lived tokens"
task check_long_lived_tokens: :environment do
+ lifetime = Rails.configuration.API.MaxTokenLifetime
+ if lifetime.nil? or lifetime == 0
+ lifetime = Rails.configuration.Login.TokenLifetime
+ end
+ if lifetime.nil? or lifetime == 0
+ puts("No expiration policy set (API.MaxTokenLifetime nor Login.TokenLifetime is set), nothing to do.")
+ # abort the rake task
+ next
+ end
user_ids = Set.new()
token_count = 0
- ll_tokens.each do |auth|
- if (auth.user.uuid =~ /-tpzed-000000000000000/).nil?
+ ll_tokens(lifetime).each do |auth|
+ if auth.user.nil?
+ printf("*** WARNING, found ApiClientAuthorization with invalid user: auth id: %d, user id: %d\n", auth.id, auth.user_id)
+ # skip this token
+ next
+ end
+ if not auth.user.nil? and (auth.user.uuid =~ /-tpzed-000000000000000/).nil?
user_ids.add(auth.user_id)
token_count += 1
end
end
end
- def ll_tokens
+ def ll_tokens(lifetime)
query = ApiClientAuthorization.where(expires_at: nil)
- if Rails.configuration.Login.TokenLifetime > 0
- query = query.or(ApiClientAuthorization.where("expires_at > ?", Time.now + Rails.configuration.Login.TokenLifetime))
- end
+ query = query.or(ApiClientAuthorization.where("expires_at > ?", Time.now + lifetime))
query
end
end
token_time = token.split('+', 2).first.to_i
assert_operator(token_time, :>=, @start_stamp, "error token too old")
assert_operator(token_time, :<=, now_timestamp, "error token too new")
- json_response['errors'].each do |err|
- assert_match(/req-[a-z0-9]{20}/, err, "X-Request-Id value missing on error message")
- end
end
def check_404(errmsg="Path not found")
check_error_token
end
- test "X-Request-Id header" do
- authorize_with :spectator
- get(:index)
- assert_match /^req-[0-9a-zA-Z]{20}$/, response.headers['X-Request-Id']
- end
-
- # The response header is the one that gets logged, so this test also
- # ensures we log the ID supplied in the request, if any.
- test "X-Request-Id given by client" do
- authorize_with :spectator
- @request.headers['X-Request-Id'] = 'abcdefG'
- get(:index)
- assert_equal 'abcdefG', response.headers['X-Request-Id']
- end
-
- test "X-Request-Id given by client is ignored if too long" do
- authorize_with :spectator
- @request.headers['X-Request-Id'] = 'abcdefG' * 1000
- get(:index)
- assert_match /^req-[0-9a-zA-Z]{20}$/, response.headers['X-Request-Id']
- end
-
['foo', '', 'FALSE', 'TRUE', nil, [true], {a:true}, '"true"'].each do |bogus|
test "bogus boolean parameter #{bogus.inspect} returns error" do
@controller = Arvados::V1::GroupsController.new
assert_nil assigns(:object)
assert_not_nil json_response['errors']
assert_response 404
+ assert_match /^req-[0-9a-zA-Z]{20}$/, response.headers['X-Request-Id']
end
end
"Unexpected new route: #{route.path.spec}")
end
end
+
+ test "X-Request-Id header" do
+ get "/", headers: auth(:spectator)
+ assert_match /^req-[0-9a-zA-Z]{20}$/, response.headers['X-Request-Id']
+ end
+
+ test "X-Request-Id header on non-existant object URL" do
+ get "/arvados/v1/container_requests/invalid",
+ params: {:format => :json}, headers: auth(:active)
+ assert_response 404
+ assert_match /^req-[0-9a-zA-Z]{20}$/, response.headers['X-Request-Id']
+ end
+
+ # The response header is the one that gets logged, so this test also
+ # ensures we log the ID supplied in the request, if any.
+ test "X-Request-Id given by client" do
+ get "/", headers: auth(:spectator).merge({'X-Request-Id': 'abcdefG'})
+ assert_equal 'abcdefG', response.headers['X-Request-Id']
+ end
+
+ test "X-Request-Id given by client is ignored if too long" do
+ authorize_with :spectator
+ long_reqId = 'abcdefG' * 1000
+ get "/", headers: auth(:spectator).merge({'X-Request-Id': long_reqId})
+ assert_match /^req-[0-9a-zA-Z]{20}$/, response.headers['X-Request-Id']
+ end
end
c.reload
assert_equal 'foobar', c.name
assert_equal 2, c.version
+ # Simulate a keep-balance run and trigger a new versionable update
+ # This tests bug #18005
+ assert_nil c.replication_confirmed
+ assert_nil c.replication_confirmed_at
+ # Updates without validations/callbacks
+ c.update_column('modified_at', fifteen_min_ago)
+ c.update_column('replication_confirmed_at', Time.now)
+ c.update_column('replication_confirmed', 2)
+ c.reload
+ assert_equal fifteen_min_ago.to_i, c.modified_at.to_i
+ assert_not_nil c.replication_confirmed_at
+ assert_not_nil c.replication_confirmed
+ # Make the versionable update
+ c.update_attributes!({'name' => 'foobarbaz'})
+ c.reload
+ assert_equal 'foobarbaz', c.name
+ assert_equal 3, c.version
end
end
end
end
+ test "storage_classes_desired default respects config" do
+ saved = Rails.configuration.DefaultStorageClasses
+ Rails.configuration.DefaultStorageClasses = ["foo"]
+ begin
+ act_as_user users(:active) do
+ c = Collection.create!
+ assert_equal ["foo"], c.storage_classes_desired
+ end
+ ensure
+ Rails.configuration.DefaultStorageClasses = saved
+ end
+ end
+
test "storage_classes_desired cannot be empty" do
act_as_user users(:active) do
c = collections(:collection_owned_by_active)
end
test "default output_storage_classes" do
- act_as_user users(:active) do
- cr = create_minimal_req!(priority: 1,
- state: ContainerRequest::Committed,
- output_name: 'foo')
- run_container(cr)
- cr.reload
- output = Collection.find_by_uuid(cr.output_uuid)
- assert_equal ["default"], output.storage_classes_desired
+ saved = Rails.configuration.DefaultStorageClasses
+ Rails.configuration.DefaultStorageClasses = ["foo"]
+ begin
+ act_as_user users(:active) do
+ cr = create_minimal_req!(priority: 1,
+ state: ContainerRequest::Committed,
+ output_name: 'foo')
+ run_container(cr)
+ cr.reload
+ output = Collection.find_by_uuid(cr.output_uuid)
+ assert_equal ["foo"], output.storage_classes_desired
+ end
+ ensure
+ Rails.configuration.DefaultStorageClasses = saved
end
end
test "account is setup" do
user = users :active
+ Rails.configuration.Users.UserNotifierEmailBcc = ConfigLoader.to_OrderedOptions({"bcc-notify@example.com"=>{},"bcc-notify2@example.com"=>{}})
Rails.configuration.Users.UserSetupMailText = %{
<% if not @user.full_name.empty? -%>
<%= @user.full_name %>,
# Test the body of the sent email contains what we expect it to
assert_equal Rails.configuration.Users.UserNotifierEmailFrom, email.from.first
+ assert_equal Rails.configuration.Users.UserNotifierEmailBcc.stringify_keys.keys, email.bcc
assert_equal user.email, email.to.first
assert_equal 'Welcome to Arvados - account enabled', email.subject
assert (email.body.to_s.include? 'Your Arvados shell account has been set up'),
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
// crunch-run terminates, mark the container as Cancelled.
func (lr *LocalRun) run(dispatcher *dispatch.Dispatcher,
container arvados.Container,
- status <-chan arvados.Container) {
+ status <-chan arvados.Container) error {
uuid := container.UUID
case lr.concurrencyLimit <- true:
break
case <-lr.ctx.Done():
- return
+ return lr.ctx.Err()
}
defer func() { <-lr.concurrencyLimit }()
}
dispatcher.Logger.Printf("finalized container %v", uuid)
+ return nil
}
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
cl := arvados.Cluster{Containers: arvados.ContainersConfig{RuntimeEngine: "docker"}}
- dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) {
- (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
- cancel()
+ dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) error {
+ defer cancel()
+ return (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
}
err = dispatcher.Run(ctx)
cl := arvados.Cluster{Containers: arvados.ContainersConfig{RuntimeEngine: "docker"}}
- dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) {
- (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
- cancel()
+ dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) error {
+ defer cancel()
+ return (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
}
re := regexp.MustCompile(`(?ms).*` + expected + `.*`)
// Dispatcher service for Crunch that submits containers to the slurm queue.
import (
- "bytes"
"context"
"flag"
"fmt"
// already in the queue). Cancel the slurm job if the container's
// priority changes to zero or its state indicates it's no longer
// running.
-func (disp *Dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) {
+func (disp *Dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
log.Printf("Submitting container %s to slurm", ctr.UUID)
cmd := []string{disp.cluster.Containers.CrunchRunCommand}
cmd = append(cmd, disp.cluster.Containers.CrunchRunArgumentsList...)
- if err := disp.submit(ctr, cmd); err != nil {
- var text string
- switch err := err.(type) {
- case dispatchcloud.ConstraintsNotSatisfiableError:
- var logBuf bytes.Buffer
- fmt.Fprintf(&logBuf, "cannot run container %s: %s\n", ctr.UUID, err)
- if len(err.AvailableTypes) == 0 {
- fmt.Fprint(&logBuf, "No instance types are configured.\n")
- } else {
- fmt.Fprint(&logBuf, "Available instance types:\n")
- for _, t := range err.AvailableTypes {
- fmt.Fprintf(&logBuf,
- "Type %q: %d VCPUs, %d RAM, %d Scratch, %f Price\n",
- t.Name, t.VCPUs, t.RAM, t.Scratch, t.Price,
- )
- }
- }
- text = logBuf.String()
- disp.UpdateState(ctr.UUID, dispatch.Cancelled)
- default:
- text = fmt.Sprintf("Error submitting container %s to slurm: %s", ctr.UUID, err)
- }
- log.Print(text)
-
- lr := arvadosclient.Dict{"log": arvadosclient.Dict{
- "object_uuid": ctr.UUID,
- "event_type": "dispatch",
- "properties": map[string]string{"text": text}}}
- disp.Arv.Create("logs", lr, nil)
-
- disp.Unlock(ctr.UUID)
- return
+ err := disp.submit(ctr, cmd)
+ if err != nil {
+ return err
}
}
case dispatch.Locked:
disp.Unlock(ctr.UUID)
}
- return
+ return nil
case updated, ok := <-status:
if !ok {
log.Printf("container %s is done: cancel slurm job", ctr.UUID)
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
func (s *IntegrationSuite) integrationTest(c *C,
expectBatch [][]string,
- runContainer func(*dispatch.Dispatcher, arvados.Container)) arvados.Container {
+ runContainer func(*dispatch.Dispatcher, arvados.Container)) (arvados.Container, error) {
arvadostest.ResetEnv()
arv, err := arvadosclient.MakeArvadosClient()
ctx, cancel := context.WithCancel(context.Background())
doneRun := make(chan struct{})
+ doneDispatch := make(chan error)
s.disp.Dispatcher = &dispatch.Dispatcher{
Arv: arv,
PollPeriod: time.Second,
- RunContainer: func(disp *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) {
+ RunContainer: func(disp *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) error {
go func() {
runContainer(disp, ctr)
s.slurm.queue = ""
doneRun <- struct{}{}
}()
- s.disp.runContainer(disp, ctr, status)
+ err := s.disp.runContainer(disp, ctr, status)
cancel()
+ doneDispatch <- err
+ return nil
},
}
err = s.disp.Dispatcher.Run(ctx)
<-doneRun
c.Assert(err, Equals, context.Canceled)
+ errDispatch := <-doneDispatch
s.disp.sqCheck.Stop()
var container arvados.Container
err = arv.Get("containers", "zzzzz-dz642-queuedcontainer", nil, &container)
c.Check(err, IsNil)
- return container
+ return container, errDispatch
}
func (s *IntegrationSuite) TestNormal(c *C) {
s.slurm = slurmFake{queue: "zzzzz-dz642-queuedcontainer 10000 100 PENDING Resources\n"}
- container := s.integrationTest(c,
+ container, _ := s.integrationTest(c,
nil,
func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
dispatcher.UpdateState(container.UUID, dispatch.Running)
s.slurm = slurmFake{queue: "zzzzz-dz642-queuedcontainer 10000 100 PENDING Resources\n"}
readyToCancel := make(chan bool)
s.slurm.onCancel = func() { <-readyToCancel }
- container := s.integrationTest(c,
+ container, _ := s.integrationTest(c,
nil,
func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
dispatcher.UpdateState(container.UUID, dispatch.Running)
}
func (s *IntegrationSuite) TestMissingFromSqueue(c *C) {
- container := s.integrationTest(c,
+ container, _ := s.integrationTest(c,
[][]string{{
fmt.Sprintf("--job-name=%s", "zzzzz-dz642-queuedcontainer"),
fmt.Sprintf("--nice=%d", 10000),
func (s *IntegrationSuite) TestSbatchFail(c *C) {
s.slurm = slurmFake{errBatch: errors.New("something terrible happened")}
- container := s.integrationTest(c,
+ container, err := s.integrationTest(c,
[][]string{{"--job-name=zzzzz-dz642-queuedcontainer", "--nice=10000", "--no-requeue", "--mem=11445", "--cpus-per-task=4", "--tmp=45777"}},
func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
dispatcher.UpdateState(container.UUID, dispatch.Running)
dispatcher.UpdateState(container.UUID, dispatch.Complete)
})
c.Check(container.State, Equals, arvados.ContainerStateComplete)
-
- arv, err := arvadosclient.MakeArvadosClient()
- c.Assert(err, IsNil)
-
- var ll arvados.LogList
- err = arv.List("logs", arvadosclient.Dict{"filters": [][]string{
- {"object_uuid", "=", container.UUID},
- {"event_type", "=", "dispatch"},
- }}, &ll)
- c.Assert(err, IsNil)
- c.Assert(len(ll.Items), Equals, 1)
+ c.Check(err, ErrorMatches, `something terrible happened`)
}
type StubbedSuite struct {
dispatcher := dispatch.Dispatcher{
Arv: arv,
PollPeriod: time.Second,
- RunContainer: func(disp *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) {
+ RunContainer: func(disp *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) error {
go func() {
time.Sleep(time.Second)
disp.UpdateState(ctr.UUID, dispatch.Running)
}()
s.disp.runContainer(disp, ctr, status)
cancel()
+ return nil
},
}
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
# SPDX-License-Identifier: Apache-2.0
case "$TARGET" in
- ubuntu1604)
- fpm_depends+=()
- ;;
debian* | ubuntu*)
fpm_depends+=(python3-distutils)
;;
def on_event(self, event, collection, name, item):
if collection == self.collection:
name = self.sanitize_filename(name)
- _logger.debug("collection notify %s %s %s %s", event, collection, name, item)
- with llfuse.lock:
- if event == arvados.collection.ADD:
- self.new_entry(name, item, self.mtime())
- elif event == arvados.collection.DEL:
- ent = self._entries[name]
- del self._entries[name]
- self.inodes.invalidate_entry(self, name)
- self.inodes.del_entry(ent)
- elif event == arvados.collection.MOD:
- if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
- self.inodes.invalidate_inode(item.fuse_entry)
- elif name in self._entries:
- self.inodes.invalidate_inode(self._entries[name])
+
+ #
+ # It's possible for another thread to have llfuse.lock and
+ # be waiting on collection.lock. Meanwhile, we released
+ # llfuse.lock earlier in the stack, but are still holding
+ # on to the collection lock, and now we need to re-acquire
+ # llfuse.lock. If we don't release the collection lock,
+ # we'll deadlock where we're holding the collection lock
+ # waiting for llfuse.lock and the other thread is holding
+ # llfuse.lock and waiting for the collection lock.
+ #
+ # The correct locking order here is to take llfuse.lock
+ # first, then the collection lock.
+ #
+ # Since collection.lock is an RLock, it might be locked
+ # multiple times, so we need to release it multiple times,
+ # keep a count, then re-lock it the correct number of
+ # times.
+ #
+ lockcount = 0
+ try:
+ while True:
+ self.collection.lock.release()
+ lockcount += 1
+ except RuntimeError:
+ pass
+
+ try:
+ with llfuse.lock:
+ with self.collection.lock:
+ if event == arvados.collection.ADD:
+ self.new_entry(name, item, self.mtime())
+ elif event == arvados.collection.DEL:
+ ent = self._entries[name]
+ del self._entries[name]
+ self.inodes.invalidate_entry(self, name)
+ self.inodes.del_entry(ent)
+ elif event == arvados.collection.MOD:
+ if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
+ self.inodes.invalidate_inode(item.fuse_entry)
+ elif name in self._entries:
+ self.inodes.invalidate_inode(self._entries[name])
+ finally:
+ while lockcount > 0:
+ self.collection.lock.acquire()
+ lockcount -= 1
def populate(self, mtime):
self._mtime = mtime
def on_event(self, *args, **kwargs):
super(TmpCollectionDirectory, self).on_event(*args, **kwargs)
if self.collection_record_file:
- with llfuse.lock:
- self.collection_record_file.invalidate()
- self.inodes.invalidate_inode(self.collection_record_file)
- _logger.debug("%s invalidated collection record", self)
+
+ # See discussion in CollectionDirectoryBase.on_event
+ lockcount = 0
+ try:
+ while True:
+ self.collection.lock.release()
+ lockcount += 1
+ except RuntimeError:
+ pass
+
+ try:
+ with llfuse.lock:
+ with self.collection.lock:
+ self.collection_record_file.invalidate()
+ self.inodes.invalidate_inode(self.collection_record_file)
+ _logger.debug("%s invalidated collection record", self)
+ finally:
+ while lockcount > 0:
+ self.collection.lock.acquire()
+ lockcount -= 1
def collection_record(self):
with llfuse.lock_released:
After=network.target
AssertPathExists=/etc/arvados/config.yml
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
"sort"
"strings"
"sync"
+ "sync/atomic"
"syscall"
"time"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/keepclient"
+ "github.com/jmoiron/sqlx"
"github.com/sirupsen/logrus"
)
// BlobSignatureTTL; and all N existing replicas of a given data block
// are in the N best positions in rendezvous probe order.
type Balancer struct {
+ DB *sqlx.DB
Logger logrus.FieldLogger
Dumper logrus.FieldLogger
Metrics *metrics
classes []string
mounts int
mountsByClass map[string]map[*KeepMount]bool
- collScanned int
+ collScanned int64
serviceRoots map[string]string
errors []error
stats balancerStats
}
if runOptions.CommitTrash {
err = bal.CommitTrash(ctx, client)
+ if err != nil {
+ return
+ }
+ }
+ if runOptions.CommitConfirmedFields {
+ err = bal.updateCollections(ctx, client, cluster)
+ if err != nil {
+ return
+ }
}
return
}
}(mounts)
}
- // collQ buffers incoming collections so we can start fetching
- // the next page without waiting for the current page to
- // finish processing.
collQ := make(chan arvados.Collection, bufs)
- // Start a goroutine to process collections. (We could use a
- // worker pool here, but even with a single worker we already
- // process collections much faster than we can retrieve them.)
+ // Retrieve all collections from the database and send them to
+ // collQ.
wg.Add(1)
go func() {
defer wg.Done()
- for coll := range collQ {
- err := bal.addCollection(coll)
- if err != nil || len(errs) > 0 {
- select {
- case errs <- err:
- default:
- }
- for range collQ {
- }
- cancel()
- return
- }
- bal.collScanned++
- }
- }()
-
- // Start a goroutine to retrieve all collections from the
- // Arvados database and send them to collQ for processing.
- wg.Add(1)
- go func() {
- defer wg.Done()
- err = EachCollection(ctx, c, pageSize,
+ err = EachCollection(ctx, bal.DB, c,
func(coll arvados.Collection) error {
collQ <- coll
if len(errs) > 0 {
}
}()
+ // Parse manifests from collQ and pass the block hashes to
+ // BlockStateMap to track desired replication.
+ for i := 0; i < runtime.NumCPU(); i++ {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ for coll := range collQ {
+ err := bal.addCollection(coll)
+ if err != nil || len(errs) > 0 {
+ select {
+ case errs <- err:
+ default:
+ }
+ cancel()
+ continue
+ }
+ atomic.AddInt64(&bal.collScanned, 1)
+ }
+ }()
+ }
+
wg.Wait()
if len(errs) > 0 {
return <-errs
if coll.ReplicationDesired != nil {
repl = *coll.ReplicationDesired
}
- bal.Logger.Debugf("%v: %d block x%d", coll.UUID, len(blkids), repl)
+ bal.Logger.Debugf("%v: %d blocks x%d", coll.UUID, len(blkids), repl)
// Pass pdh to IncreaseDesired only if LostBlocksFile is being
// written -- otherwise it's just a waste of memory.
pdh := ""
// effectively read-only.
mnt.ReadOnly = mnt.ReadOnly || srv.ReadOnly
- if len(mnt.StorageClasses) == 0 {
- bal.mountsByClass["default"][mnt] = true
- continue
- }
for class := range mnt.StorageClasses {
if mbc := bal.mountsByClass[class]; mbc == nil {
bal.classes = append(bal.classes, class)
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadostest"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "github.com/jmoiron/sqlx"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/expfmt"
check "gopkg.in/check.v1"
var stubMounts = map[string][]arvados.KeepMount{
"keep0.zzzzz.arvadosapi.com:25107": {{
- UUID: "zzzzz-ivpuk-000000000000000",
- DeviceID: "keep0-vol0",
+ UUID: "zzzzz-ivpuk-000000000000000",
+ DeviceID: "keep0-vol0",
+ StorageClasses: map[string]bool{"default": true},
}},
"keep1.zzzzz.arvadosapi.com:25107": {{
- UUID: "zzzzz-ivpuk-100000000000000",
- DeviceID: "keep1-vol0",
+ UUID: "zzzzz-ivpuk-100000000000000",
+ DeviceID: "keep1-vol0",
+ StorageClasses: map[string]bool{"default": true},
}},
"keep2.zzzzz.arvadosapi.com:25107": {{
- UUID: "zzzzz-ivpuk-200000000000000",
- DeviceID: "keep2-vol0",
+ UUID: "zzzzz-ivpuk-200000000000000",
+ DeviceID: "keep2-vol0",
+ StorageClasses: map[string]bool{"default": true},
}},
"keep3.zzzzz.arvadosapi.com:25107": {{
- UUID: "zzzzz-ivpuk-300000000000000",
- DeviceID: "keep3-vol0",
+ UUID: "zzzzz-ivpuk-300000000000000",
+ DeviceID: "keep3-vol0",
+ StorageClasses: map[string]bool{"default": true},
}},
}
type runSuite struct {
stub stubServer
config *arvados.Cluster
+ db *sqlx.DB
client *arvados.Client
}
Metrics: newMetrics(prometheus.NewRegistry()),
Logger: options.Logger,
Dumper: options.Dumper,
+ DB: s.db,
}
return srv
}
c.Assert(err, check.Equals, nil)
s.config, err = cfg.GetCluster("")
c.Assert(err, check.Equals, nil)
+ s.db, err = sqlx.Open("postgres", s.config.PostgreSQL.Connection.String())
+ c.Assert(err, check.IsNil)
s.config.Collections.BalancePeriod = arvados.Duration(time.Second)
arvadostest.SetServiceURL(&s.config.Services.Keepbalance, "http://localhost:/")
}
func (s *runSuite) TestRefuseZeroCollections(c *check.C) {
+ defer arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil)
+ _, err := s.db.Exec(`delete from collections`)
+ c.Assert(err, check.IsNil)
opts := RunOptions{
CommitPulls: true,
CommitTrash: true,
trashReqs := s.stub.serveKeepstoreTrash()
pullReqs := s.stub.serveKeepstorePull()
srv := s.newServer(&opts)
- _, err := srv.runOnce()
+ _, err = srv.runOnce()
c.Check(err, check.ErrorMatches, "received zero collections")
c.Check(trashReqs.Count(), check.Equals, 4)
c.Check(pullReqs.Count(), check.Equals, 0)
c.Check(pullReqs.Count(), check.Equals, 0)
}
-func (s *runSuite) TestDetectSkippedCollections(c *check.C) {
- opts := RunOptions{
- CommitPulls: true,
- CommitTrash: true,
- Logger: ctxlog.TestLogger(c),
- }
- s.stub.serveCurrentUserAdmin()
- s.stub.serveCollectionsButSkipOne()
- s.stub.serveKeepServices(stubServices)
- s.stub.serveKeepstoreMounts()
- s.stub.serveKeepstoreIndexFoo4Bar1()
- trashReqs := s.stub.serveKeepstoreTrash()
- pullReqs := s.stub.serveKeepstorePull()
- srv := s.newServer(&opts)
- _, err := srv.runOnce()
- c.Check(err, check.ErrorMatches, `Retrieved 2 collections with modtime <= .* but server now reports there are 3 collections.*`)
- c.Check(trashReqs.Count(), check.Equals, 4)
- c.Check(pullReqs.Count(), check.Equals, 0)
-}
-
func (s *runSuite) TestWriteLostBlocks(c *check.C) {
lostf, err := ioutil.TempFile("", "keep-balance-lost-blocks-test-")
c.Assert(err, check.IsNil)
c.Check(err, check.IsNil)
lost, err := ioutil.ReadFile(lostf.Name())
c.Assert(err, check.IsNil)
- c.Check(string(lost), check.Equals, "37b51d194a7513e45b56f6524f2d51f2 fa7aeb5140e2848d39b416daeef4ffc5+45\n")
+ c.Check(string(lost), check.Matches, `(?ms).*37b51d194a7513e45b56f6524f2d51f2.* fa7aeb5140e2848d39b416daeef4ffc5\+45.*`)
}
func (s *runSuite) TestDryRun(c *check.C) {
}
func (s *runSuite) TestCommit(c *check.C) {
- lostf, err := ioutil.TempFile("", "keep-balance-lost-blocks-test-")
- c.Assert(err, check.IsNil)
- s.config.Collections.BlobMissingReport = lostf.Name()
- defer os.Remove(lostf.Name())
-
+ s.config.Collections.BlobMissingReport = c.MkDir() + "/keep-balance-lost-blocks-test-"
s.config.ManagementToken = "xyzzy"
opts := RunOptions{
CommitPulls: true,
// in a poor rendezvous position
c.Check(bal.stats.pulls, check.Equals, 2)
- lost, err := ioutil.ReadFile(lostf.Name())
+ lost, err := ioutil.ReadFile(s.config.Collections.BlobMissingReport)
c.Assert(err, check.IsNil)
- c.Check(string(lost), check.Equals, "")
+ c.Check(string(lost), check.Not(check.Matches), `(?ms).*acbd18db4cc2f85cedef654fccc4a4d8.*`)
buf, err := s.getMetrics(c, srv)
c.Check(err, check.IsNil)
- c.Check(buf, check.Matches, `(?ms).*\narvados_keep_total_bytes 15\n.*`)
- c.Check(buf, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`)
- c.Check(buf, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 1\n.*`)
- c.Check(buf, check.Matches, `(?ms).*\narvados_keep_dedup_byte_ratio 1\.5\n.*`)
- c.Check(buf, check.Matches, `(?ms).*\narvados_keep_dedup_block_ratio 1\.5\n.*`)
+ bufstr := buf.String()
+ c.Check(bufstr, check.Matches, `(?ms).*\narvados_keep_total_bytes 15\n.*`)
+ c.Check(bufstr, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_sum [0-9\.]+\n.*`)
+ c.Check(bufstr, check.Matches, `(?ms).*\narvados_keepbalance_changeset_compute_seconds_count 1\n.*`)
+ c.Check(bufstr, check.Matches, `(?ms).*\narvados_keep_dedup_byte_ratio [1-9].*`)
+ c.Check(bufstr, check.Matches, `(?ms).*\narvados_keep_dedup_block_ratio [1-9].*`)
}
func (s *runSuite) TestRunForever(c *check.C) {
}
srv.mounts = []*KeepMount{{
KeepMount: arvados.KeepMount{
- UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
+ UUID: fmt.Sprintf("zzzzz-mount-%015x", i),
+ StorageClasses: map[string]bool{"default": true},
},
KeepService: srv,
}}
srv.mounts[0].KeepMount.DeviceID = fmt.Sprintf("writable-by-srv-%x", i)
srv.mounts = append(srv.mounts, &KeepMount{
KeepMount: arvados.KeepMount{
- DeviceID: fmt.Sprintf("writable-by-srv-%x", (i+1)%len(bal.srvs)),
- UUID: fmt.Sprintf("zzzzz-mount-%015x", i<<16),
- ReadOnly: readonly,
- Replication: 1,
+ DeviceID: fmt.Sprintf("writable-by-srv-%x", (i+1)%len(bal.srvs)),
+ UUID: fmt.Sprintf("zzzzz-mount-%015x", i<<16),
+ ReadOnly: readonly,
+ Replication: 1,
+ StorageClasses: map[string]bool{"default": true},
},
KeepService: srv,
})
bsm.get(blkid).increaseDesired(pdh, classes, n)
}
}
+
+// GetConfirmedReplication returns the replication level of the given
+// blocks, considering only the specified storage classes.
+//
+// If len(classes)==0, returns the replication level without regard to
+// storage classes.
+//
+// Safe to call concurrently with other calls to GetCurrent, but not
+// with different BlockStateMap methods.
+func (bsm *BlockStateMap) GetConfirmedReplication(blkids []arvados.SizedDigest, classes []string) int {
+ defaultClasses := map[string]bool{"default": true}
+ min := 0
+ for _, blkid := range blkids {
+ total := 0
+ perclass := make(map[string]int, len(classes))
+ for _, c := range classes {
+ perclass[c] = 0
+ }
+ for _, r := range bsm.get(blkid).Replicas {
+ total += r.KeepMount.Replication
+ mntclasses := r.KeepMount.StorageClasses
+ if len(mntclasses) == 0 {
+ mntclasses = defaultClasses
+ }
+ for c := range mntclasses {
+ n, ok := perclass[c]
+ if !ok {
+ // Don't care about this storage class
+ continue
+ }
+ perclass[c] = n + r.KeepMount.Replication
+ }
+ }
+ if total == 0 {
+ return 0
+ }
+ for _, n := range perclass {
+ if n == 0 {
+ return 0
+ }
+ if n < min || min == 0 {
+ min = n
+ }
+ }
+ if len(perclass) == 0 && (min == 0 || min > total) {
+ min = total
+ }
+ }
+ return min
+}
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+ "time"
+
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&confirmedReplicationSuite{})
+
+type confirmedReplicationSuite struct {
+ blockStateMap *BlockStateMap
+ mtime int64
+}
+
+func (s *confirmedReplicationSuite) SetUpTest(c *check.C) {
+ t, _ := time.Parse(time.RFC3339Nano, time.RFC3339Nano)
+ s.mtime = t.UnixNano()
+ s.blockStateMap = NewBlockStateMap()
+ s.blockStateMap.AddReplicas(&KeepMount{KeepMount: arvados.KeepMount{
+ Replication: 1,
+ StorageClasses: map[string]bool{"default": true},
+ }}, []arvados.KeepServiceIndexEntry{
+ {SizedDigest: knownBlkid(10), Mtime: s.mtime},
+ })
+ s.blockStateMap.AddReplicas(&KeepMount{KeepMount: arvados.KeepMount{
+ Replication: 2,
+ StorageClasses: map[string]bool{"default": true},
+ }}, []arvados.KeepServiceIndexEntry{
+ {SizedDigest: knownBlkid(20), Mtime: s.mtime},
+ })
+}
+
+func (s *confirmedReplicationSuite) TestZeroReplication(c *check.C) {
+ n := s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(404), knownBlkid(409)}, []string{"default"})
+ c.Check(n, check.Equals, 0)
+ n = s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(10), knownBlkid(404)}, []string{"default"})
+ c.Check(n, check.Equals, 0)
+ n = s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(10), knownBlkid(404)}, nil)
+ c.Check(n, check.Equals, 0)
+}
+
+func (s *confirmedReplicationSuite) TestBlocksWithDifferentReplication(c *check.C) {
+ n := s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(10), knownBlkid(20)}, []string{"default"})
+ c.Check(n, check.Equals, 1)
+}
+
+func (s *confirmedReplicationSuite) TestBlocksInDifferentClasses(c *check.C) {
+ s.blockStateMap.AddReplicas(&KeepMount{KeepMount: arvados.KeepMount{
+ Replication: 3,
+ StorageClasses: map[string]bool{"three": true},
+ }}, []arvados.KeepServiceIndexEntry{
+ {SizedDigest: knownBlkid(30), Mtime: s.mtime},
+ })
+
+ n := s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(30)}, []string{"three"})
+ c.Check(n, check.Equals, 3)
+ n = s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(20), knownBlkid(30)}, []string{"default"})
+ c.Check(n, check.Equals, 0) // block 30 has repl 0 @ "default"
+ n = s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(20), knownBlkid(30)}, []string{"three"})
+ c.Check(n, check.Equals, 0) // block 20 has repl 0 @ "three"
+ n = s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(20), knownBlkid(30)}, nil)
+ c.Check(n, check.Equals, 2)
+}
+
+func (s *confirmedReplicationSuite) TestBlocksOnMultipleMounts(c *check.C) {
+ s.blockStateMap.AddReplicas(&KeepMount{KeepMount: arvados.KeepMount{
+ Replication: 2,
+ StorageClasses: map[string]bool{"default": true, "four": true},
+ }}, []arvados.KeepServiceIndexEntry{
+ {SizedDigest: knownBlkid(40), Mtime: s.mtime},
+ {SizedDigest: knownBlkid(41), Mtime: s.mtime},
+ })
+ s.blockStateMap.AddReplicas(&KeepMount{KeepMount: arvados.KeepMount{
+ Replication: 2,
+ StorageClasses: map[string]bool{"four": true},
+ }}, []arvados.KeepServiceIndexEntry{
+ {SizedDigest: knownBlkid(40), Mtime: s.mtime},
+ {SizedDigest: knownBlkid(41), Mtime: s.mtime},
+ })
+ n := s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(40), knownBlkid(41)}, []string{"default"})
+ c.Check(n, check.Equals, 2)
+ n = s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(40), knownBlkid(41)}, []string{"four"})
+ c.Check(n, check.Equals, 4)
+ n = s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(40), knownBlkid(41)}, []string{"default", "four"})
+ c.Check(n, check.Equals, 2)
+ n = s.blockStateMap.GetConfirmedReplication([]arvados.SizedDigest{knownBlkid(40), knownBlkid(41)}, nil)
+ c.Check(n, check.Equals, 4)
+}
import (
"context"
+ "encoding/json"
"fmt"
+ "runtime"
+ "sync"
+ "sync/atomic"
"time"
"git.arvados.org/arvados.git/sdk/go/arvados"
+ "github.com/jmoiron/sqlx"
)
func countCollections(c *arvados.Client, params arvados.ResourceListParams) (int, error) {
// The progress function is called periodically with done (number of
// times f has been called) and total (number of times f is expected
// to be called).
-//
-// If pageSize > 0 it is used as the maximum page size in each API
-// call; otherwise the maximum allowed page size is requested.
-func EachCollection(ctx context.Context, c *arvados.Client, pageSize int, f func(arvados.Collection) error, progress func(done, total int)) error {
+func EachCollection(ctx context.Context, db *sqlx.DB, c *arvados.Client, f func(arvados.Collection) error, progress func(done, total int)) error {
if progress == nil {
progress = func(_, _ int) {}
}
if err != nil {
return err
}
+ var newestModifiedAt time.Time
- // Note the obvious way to get all collections (sorting by
- // UUID) would be much easier, but would lose data: If a
- // client were to move files from collection with uuid="zzz"
- // to a collection with uuid="aaa" around the time when we
- // were fetching the "mmm" page, we would never see those
- // files' block IDs at all -- even if the client is careful to
- // save "aaa" before saving "zzz".
- //
- // Instead, we get pages in modified_at order. Collections
- // that are modified during the run will be re-fetched in a
- // subsequent page.
-
- limit := pageSize
- if limit <= 0 {
- // Use the maximum page size the server allows
- limit = 1<<31 - 1
- }
- params := arvados.ResourceListParams{
- Limit: &limit,
- Order: "modified_at, uuid",
- Count: "none",
- Select: []string{"uuid", "unsigned_manifest_text", "modified_at", "portable_data_hash", "replication_desired"},
- IncludeTrash: true,
- IncludeOldVersions: true,
+ rows, err := db.QueryxContext(ctx, `SELECT
+ uuid, manifest_text, modified_at, portable_data_hash,
+ replication_desired, replication_confirmed, replication_confirmed_at,
+ storage_classes_desired, storage_classes_confirmed, storage_classes_confirmed_at,
+ is_trashed
+ FROM collections`)
+ if err != nil {
+ return err
}
- var last arvados.Collection
- var filterTime time.Time
+ defer rows.Close()
+ progressTicker := time.NewTicker(10 * time.Second)
+ defer progressTicker.Stop()
callCount := 0
- gettingExactTimestamp := false
- for {
- progress(callCount, expectCount)
- var page arvados.CollectionList
- err := c.RequestAndDecodeContext(ctx, &page, "GET", "arvados/v1/collections", nil, params)
+ for rows.Next() {
+ var coll arvados.Collection
+ var classesDesired, classesConfirmed []byte
+ err = rows.Scan(&coll.UUID, &coll.ManifestText, &coll.ModifiedAt, &coll.PortableDataHash,
+ &coll.ReplicationDesired, &coll.ReplicationConfirmed, &coll.ReplicationConfirmedAt,
+ &classesDesired, &classesConfirmed, &coll.StorageClassesConfirmedAt,
+ &coll.IsTrashed)
if err != nil {
return err
}
- for _, coll := range page.Items {
- if last.ModifiedAt == coll.ModifiedAt && last.UUID >= coll.UUID {
- continue
- }
- callCount++
- err = f(coll)
- if err != nil {
- return err
- }
- last = coll
+
+ err = json.Unmarshal(classesDesired, &coll.StorageClassesDesired)
+ if err != nil && len(classesDesired) > 0 {
+ return err
}
- if len(page.Items) == 0 && !gettingExactTimestamp {
- break
- } else if last.ModifiedAt.IsZero() {
- return fmt.Errorf("BUG: Last collection on the page (%s) has no modified_at timestamp; cannot make progress", last.UUID)
- } else if len(page.Items) > 0 && last.ModifiedAt == filterTime {
- // If we requested time>=X and never got a
- // time>X then we might not have received all
- // items with time==X yet. Switch to
- // gettingExactTimestamp mode (if we're not
- // there already), advancing our UUID
- // threshold with each request, until we get
- // an empty page.
- gettingExactTimestamp = true
- params.Filters = []arvados.Filter{{
- Attr: "modified_at",
- Operator: "=",
- Operand: filterTime,
- }, {
- Attr: "uuid",
- Operator: ">",
- Operand: last.UUID,
- }}
- } else if gettingExactTimestamp {
- // This must be an empty page (in this mode,
- // an unequal timestamp is impossible) so we
- // can start getting pages of newer
- // collections.
- gettingExactTimestamp = false
- params.Filters = []arvados.Filter{{
- Attr: "modified_at",
- Operator: ">",
- Operand: filterTime,
- }}
- } else {
- // In the normal case, we know we have seen
- // all collections with modtime<filterTime,
- // but we might not have seen all that have
- // modtime=filterTime. Hence we use >= instead
- // of > and skip the obvious overlapping item,
- // i.e., the last item on the previous
- // page. In some edge cases this can return
- // collections we have already seen, but
- // avoiding that would add overhead in the
- // overwhelmingly common cases, so we don't
- // bother.
- filterTime = last.ModifiedAt
- params.Filters = []arvados.Filter{{
- Attr: "modified_at",
- Operator: ">=",
- Operand: filterTime,
- }, {
- Attr: "uuid",
- Operator: "!=",
- Operand: last.UUID,
- }}
+ err = json.Unmarshal(classesConfirmed, &coll.StorageClassesConfirmed)
+ if err != nil && len(classesConfirmed) > 0 {
+ return err
+ }
+ if newestModifiedAt.IsZero() || newestModifiedAt.Before(coll.ModifiedAt) {
+ newestModifiedAt = coll.ModifiedAt
+ }
+ callCount++
+ err = f(coll)
+ if err != nil {
+ return err
+ }
+ select {
+ case <-progressTicker.C:
+ progress(callCount, expectCount)
+ default:
}
}
progress(callCount, expectCount)
-
+ err = rows.Close()
+ if err != nil {
+ return err
+ }
if checkCount, err := countCollections(c, arvados.ResourceListParams{
Filters: []arvados.Filter{{
Attr: "modified_at",
Operator: "<=",
- Operand: filterTime}},
+ Operand: newestModifiedAt}},
IncludeTrash: true,
IncludeOldVersions: true,
}); err != nil {
return err
} else if callCount < checkCount {
- return fmt.Errorf("Retrieved %d collections with modtime <= T=%q, but server now reports there are %d collections with modtime <= T", callCount, filterTime, checkCount)
+ return fmt.Errorf("Retrieved %d collections with modtime <= T=%q, but server now reports there are %d collections with modtime <= T", callCount, newestModifiedAt, checkCount)
}
return nil
}
+
+func (bal *Balancer) updateCollections(ctx context.Context, c *arvados.Client, cluster *arvados.Cluster) error {
+ ctx, cancel := context.WithCancel(ctx)
+ defer cancel()
+
+ defer bal.time("update_collections", "wall clock time to update collections")()
+ threshold := time.Now()
+ thresholdStr := threshold.Format(time.RFC3339Nano)
+
+ updated := int64(0)
+
+ errs := make(chan error, 1)
+ collQ := make(chan arvados.Collection, cluster.Collections.BalanceCollectionBuffers)
+ go func() {
+ defer close(collQ)
+ err := EachCollection(ctx, bal.DB, c, func(coll arvados.Collection) error {
+ if atomic.LoadInt64(&updated) >= int64(cluster.Collections.BalanceUpdateLimit) {
+ bal.logf("reached BalanceUpdateLimit (%d)", cluster.Collections.BalanceUpdateLimit)
+ cancel()
+ return context.Canceled
+ }
+ collQ <- coll
+ return nil
+ }, func(done, total int) {
+ bal.logf("update collections: %d/%d (%d updated @ %.01f updates/s)", done, total, atomic.LoadInt64(&updated), float64(atomic.LoadInt64(&updated))/time.Since(threshold).Seconds())
+ })
+ if err != nil && err != context.Canceled {
+ select {
+ case errs <- err:
+ default:
+ }
+ }
+ }()
+
+ var wg sync.WaitGroup
+
+ // Use about 1 goroutine per 2 CPUs. Based on experiments with
+ // a 2-core host, using more concurrent database
+ // calls/transactions makes this process slower, not faster.
+ for i := 0; i < runtime.NumCPU()+1/2; i++ {
+ wg.Add(1)
+ goSendErr(errs, func() error {
+ defer wg.Done()
+ tx, err := bal.DB.Beginx()
+ if err != nil {
+ return err
+ }
+ txPending := 0
+ flush := func(final bool) error {
+ err := tx.Commit()
+ if err != nil && ctx.Err() == nil {
+ tx.Rollback()
+ return err
+ }
+ txPending = 0
+ if final {
+ return nil
+ }
+ tx, err = bal.DB.Beginx()
+ return err
+ }
+ txBatch := 100
+ for coll := range collQ {
+ if ctx.Err() != nil || len(errs) > 0 {
+ continue
+ }
+ blkids, err := coll.SizedDigests()
+ if err != nil {
+ bal.logf("%s: %s", coll.UUID, err)
+ continue
+ }
+ repl := bal.BlockStateMap.GetConfirmedReplication(blkids, coll.StorageClassesDesired)
+
+ desired := bal.DefaultReplication
+ if coll.ReplicationDesired != nil {
+ desired = *coll.ReplicationDesired
+ }
+ if repl > desired {
+ // If actual>desired, confirm
+ // the desired number rather
+ // than actual to avoid
+ // flapping updates when
+ // replication increases
+ // temporarily.
+ repl = desired
+ }
+ classes := emptyJSONArray
+ if repl > 0 {
+ classes, err = json.Marshal(coll.StorageClassesDesired)
+ if err != nil {
+ bal.logf("BUG? json.Marshal(%v) failed: %s", classes, err)
+ continue
+ }
+ }
+ needUpdate := coll.ReplicationConfirmed == nil || *coll.ReplicationConfirmed != repl || len(coll.StorageClassesConfirmed) != len(coll.StorageClassesDesired)
+ for i := range coll.StorageClassesDesired {
+ if !needUpdate && coll.StorageClassesDesired[i] != coll.StorageClassesConfirmed[i] {
+ needUpdate = true
+ }
+ }
+ if !needUpdate {
+ continue
+ }
+ _, err = tx.ExecContext(ctx, `update collections set
+ replication_confirmed=$1,
+ replication_confirmed_at=$2,
+ storage_classes_confirmed=$3,
+ storage_classes_confirmed_at=$2
+ where uuid=$4`,
+ repl, thresholdStr, classes, coll.UUID)
+ if err != nil {
+ if ctx.Err() == nil {
+ bal.logf("%s: update failed: %s", coll.UUID, err)
+ }
+ continue
+ }
+ atomic.AddInt64(&updated, 1)
+ if txPending++; txPending >= txBatch {
+ err = flush(false)
+ if err != nil {
+ return err
+ }
+ }
+ }
+ return flush(true)
+ })
+ }
+ wg.Wait()
+ bal.logf("updated %d collections", updated)
+ if len(errs) > 0 {
+ return fmt.Errorf("error updating collections: %s", <-errs)
+ }
+ return nil
+}
+
+// Call f in a new goroutine. If it returns a non-nil error, send the
+// error to the errs channel (unless the channel is already full with
+// another error).
+func goSendErr(errs chan<- error, f func() error) {
+ go func() {
+ err := f()
+ if err != nil {
+ select {
+ case errs <- err:
+ default:
+ }
+ }
+ }()
+}
+
+var emptyJSONArray = []byte("[]")
import (
"context"
- "sync"
- "time"
+ "git.arvados.org/arvados.git/lib/config"
"git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "github.com/jmoiron/sqlx"
check "gopkg.in/check.v1"
)
-// TestIdenticalTimestamps ensures EachCollection returns the same
-// set of collections for various page sizes -- even page sizes so
-// small that we get entire pages full of collections with identical
-// timestamps and exercise our gettingExactTimestamp cases.
-func (s *integrationSuite) TestIdenticalTimestamps(c *check.C) {
- // pageSize==0 uses the default (large) page size.
- pageSizes := []int{0, 2, 3, 4, 5}
- got := make([][]string, len(pageSizes))
- var wg sync.WaitGroup
- for trial, pageSize := range pageSizes {
- wg.Add(1)
- go func(trial, pageSize int) {
- defer wg.Done()
- streak := 0
- longestStreak := 0
- var lastMod time.Time
- sawUUID := make(map[string]bool)
- err := EachCollection(context.Background(), s.client, pageSize, func(c arvados.Collection) error {
- if c.ModifiedAt.IsZero() {
- return nil
- }
- if sawUUID[c.UUID] {
- // dup
- return nil
- }
- got[trial] = append(got[trial], c.UUID)
- sawUUID[c.UUID] = true
- if lastMod == c.ModifiedAt {
- streak++
- if streak > longestStreak {
- longestStreak = streak
- }
- } else {
- streak = 0
- lastMod = c.ModifiedAt
- }
- return nil
- }, nil)
- c.Check(err, check.IsNil)
- c.Check(longestStreak > 25, check.Equals, true)
- }(trial, pageSize)
- }
- wg.Wait()
- for trial := 1; trial < len(pageSizes); trial++ {
- c.Check(got[trial], check.DeepEquals, got[0])
- }
+// TestMissedCollections exercises EachCollection's sanity check:
+// #collections processed >= #old collections that exist in database
+// after processing.
+func (s *integrationSuite) TestMissedCollections(c *check.C) {
+ cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
+ c.Assert(err, check.IsNil)
+ cluster, err := cfg.GetCluster("")
+ c.Assert(err, check.IsNil)
+ db, err := sqlx.Open("postgres", cluster.PostgreSQL.Connection.String())
+ c.Assert(err, check.IsNil)
+
+ defer db.Exec(`delete from collections where uuid = 'zzzzz-4zz18-404040404040404'`)
+ insertedOld := false
+ err = EachCollection(context.Background(), db, s.client, func(coll arvados.Collection) error {
+ if !insertedOld {
+ insertedOld = true
+ _, err := db.Exec(`insert into collections (uuid, created_at, updated_at, modified_at) values ('zzzzz-4zz18-404040404040404', '2002-02-02T02:02:02Z', '2002-02-02T02:02:02Z', '2002-02-02T02:02:02Z')`)
+ return err
+ }
+ return nil
+ }, nil)
+ c.Check(err, check.ErrorMatches, `Retrieved .* collections .* but server now reports .* collections.*`)
}
import (
"bytes"
+ "io"
"os"
"strings"
"testing"
"git.arvados.org/arvados.git/sdk/go/arvadostest"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/keepclient"
+ "github.com/jmoiron/sqlx"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
check "gopkg.in/check.v1"
type integrationSuite struct {
config *arvados.Cluster
+ db *sqlx.DB
client *arvados.Client
keepClient *keepclient.KeepClient
}
c.Assert(err, check.Equals, nil)
s.config, err = cfg.GetCluster("")
c.Assert(err, check.Equals, nil)
+ s.db, err = sqlx.Open("postgres", s.config.PostgreSQL.Connection.String())
+ c.Assert(err, check.IsNil)
s.config.Collections.BalancePeriod = arvados.Duration(time.Second)
s.client = &arvados.Client{
for iter := 0; iter < 20; iter++ {
logBuf.Reset()
logger := logrus.New()
- logger.Out = &logBuf
+ logger.Out = io.MultiWriter(&logBuf, os.Stderr)
opts := RunOptions{
- CommitPulls: true,
- CommitTrash: true,
- Logger: logger,
+ CommitPulls: true,
+ CommitTrash: true,
+ CommitConfirmedFields: true,
+ Logger: logger,
}
bal := &Balancer{
+ DB: s.db,
Logger: logger,
Metrics: newMetrics(prometheus.NewRegistry()),
}
time.Sleep(200 * time.Millisecond)
}
c.Check(logBuf.String(), check.Not(check.Matches), `(?ms).*0 replicas (0 blocks, 0 bytes) underreplicated.*`)
+
+ for _, trial := range []struct {
+ uuid string
+ repl int
+ classes []string
+ }{
+ {arvadostest.EmptyCollectionUUID, 0, []string{}},
+ {arvadostest.FooCollection, 2, []string{"default"}}, // "foo" blk
+ {arvadostest.StorageClassesDesiredDefaultConfirmedDefault, 2, []string{"default"}}, // "bar" blk
+ {arvadostest.StorageClassesDesiredArchiveConfirmedDefault, 0, []string{}}, // "bar" blk
+ } {
+ c.Logf("%#v", trial)
+ var coll arvados.Collection
+ s.client.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+trial.uuid, nil, nil)
+ if c.Check(coll.ReplicationConfirmed, check.NotNil) {
+ c.Check(*coll.ReplicationConfirmed, check.Equals, trial.repl)
+ }
+ c.Check(coll.StorageClassesConfirmed, check.DeepEquals, trial.classes)
+ }
}
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
"flag"
"fmt"
"io"
+ "net/http"
+ _ "net/http/pprof"
"os"
"git.arvados.org/arvados.git/lib/config"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/health"
+ "github.com/jmoiron/sqlx"
+ _ "github.com/lib/pq"
"github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
)
"send pull requests (make more replicas of blocks that are underreplicated or are not in optimal rendezvous probe order)")
flags.BoolVar(&options.CommitTrash, "commit-trash", false,
"send trash requests (delete unreferenced old blocks, and excess replicas of overreplicated blocks)")
+ flags.BoolVar(&options.CommitConfirmedFields, "commit-confirmed-fields", true,
+ "update collection fields (replicas_confirmed, storage_classes_confirmed, etc.)")
flags.Bool("version", false, "Write version information to stdout and exit 0")
dumpFlag := flags.Bool("dump", false, "dump details for each block to stdout")
+ pprofAddr := flags.String("pprof", "", "serve Go profile data at `[addr]:port`")
+
+ if *pprofAddr != "" {
+ go func() {
+ logrus.Println(http.ListenAndServe(*pprofAddr, nil))
+ }()
+ }
loader := config.NewLoader(os.Stdin, logger)
loader.SetupFlags(flags)
// service.Command
args = nil
dropFlag := map[string]bool{
- "once": true,
- "commit-pulls": true,
- "commit-trash": true,
- "dump": true,
+ "once": true,
+ "commit-pulls": true,
+ "commit-trash": true,
+ "commit-confirmed-fields": true,
+ "dump": true,
}
flags.Visit(func(f *flag.Flag) {
if !dropFlag[f.Name] {
return service.ErrorHandler(ctx, cluster, fmt.Errorf("error initializing client from cluster config: %s", err))
}
+ db, err := sqlx.Open("postgres", cluster.PostgreSQL.Connection.String())
+ if err != nil {
+ return service.ErrorHandler(ctx, cluster, fmt.Errorf("postgresql connection failed: %s", err))
+ }
+ if p := cluster.PostgreSQL.ConnectionPool; p > 0 {
+ db.SetMaxOpenConns(p)
+ }
+ err = db.Ping()
+ if err != nil {
+ return service.ErrorHandler(ctx, cluster, fmt.Errorf("postgresql connection succeeded but ping failed: %s", err))
+ }
+
if options.Logger == nil {
options.Logger = ctxlog.FromContext(ctx)
}
Metrics: newMetrics(registry),
Logger: options.Logger,
Dumper: options.Dumper,
+ DB: db,
}
srv.Handler = &health.Handler{
Token: cluster.ManagementToken,
"net/http"
"time"
+ "git.arvados.org/arvados.git/lib/config"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadostest"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "github.com/ghodss/yaml"
check "gopkg.in/check.v1"
)
}
func (s *mainSuite) TestHTTPServer(c *check.C) {
+ arvadostest.StartKeep(2, true)
+
ln, err := net.Listen("tcp", ":0")
if err != nil {
c.Fatal(err)
_, p, err := net.SplitHostPort(ln.Addr().String())
c.Check(err, check.IsNil)
ln.Close()
- config := "Clusters:\n zzzzz:\n ManagementToken: abcdefg\n Services: {Keepbalance: {InternalURLs: {'http://localhost:" + p + "/': {}}}}\n"
+ cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
+ c.Assert(err, check.IsNil)
+ cluster, err := cfg.GetCluster("")
+ c.Assert(err, check.IsNil)
+ cluster.Services.Keepbalance.InternalURLs[arvados.URL{Host: "localhost:" + p, Path: "/"}] = arvados.ServiceInstance{}
+ cfg.Clusters[cluster.ClusterID] = *cluster
+ config, err := yaml.Marshal(cfg)
+ c.Assert(err, check.IsNil)
var stdout bytes.Buffer
- go runCommand("keep-balance", []string{"-config", "-"}, bytes.NewBufferString(config), &stdout, &stdout)
+ go runCommand("keep-balance", []string{"-config", "-"}, bytes.NewBuffer(config), &stdout, &stdout)
done := make(chan struct{})
go func() {
defer close(done)
c.Fatal(err)
return
}
- req.Header.Set("Authorization", "Bearer abcdefg")
+ req.Header.Set("Authorization", "Bearer "+cluster.ManagementToken)
resp, err := http.DefaultClient.Do(req)
if err != nil {
c.Logf("error %s", err)
c.Log(stdout.String())
c.Fatal("timeout")
}
+ c.Log(stdout.String())
// Check non-metrics URL that gets passed through to us from
// service.Command
"time"
"git.arvados.org/arvados.git/sdk/go/arvados"
+ "github.com/jmoiron/sqlx"
"github.com/sirupsen/logrus"
)
//
// RunOptions fields are controlled by command line flags.
type RunOptions struct {
- Once bool
- CommitPulls bool
- CommitTrash bool
- Logger logrus.FieldLogger
- Dumper logrus.FieldLogger
+ Once bool
+ CommitPulls bool
+ CommitTrash bool
+ CommitConfirmedFields bool
+ Logger logrus.FieldLogger
+ Dumper logrus.FieldLogger
// SafeRendezvousState from the most recent balance operation,
// or "" if unknown. If this changes from one run to the next,
Logger logrus.FieldLogger
Dumper logrus.FieldLogger
+
+ DB *sqlx.DB
}
// CheckHealth implements service.Handler.
func (srv *Server) CheckHealth() error {
- return nil
+ return srv.DB.Ping()
}
// Done implements service.Handler.
func (srv *Server) runOnce() (*Balancer, error) {
bal := &Balancer{
+ DB: srv.DB,
Logger: srv.Logger,
Dumper: srv.Dumper,
Metrics: srv.Metrics,
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
import (
"bytes"
"crypto/md5"
- "errors"
"fmt"
"io/ioutil"
"math/rand"
content := []byte("TestDesiredReplicas")
hash := fmt.Sprintf("%x", md5.Sum(content))
- for _, kc.Want_replicas = range []int{0, 1, 2} {
+ for _, kc.Want_replicas = range []int{0, 1, 2, 3} {
locator, rep, err := kc.PutB(content)
- c.Check(err, Equals, nil)
- c.Check(rep, Equals, kc.Want_replicas)
- if rep > 0 {
- c.Check(locator, Matches, fmt.Sprintf(`^%s\+%d(\+.+)?$`, hash, len(content)))
+ if kc.Want_replicas < 3 {
+ c.Check(err, Equals, nil)
+ c.Check(rep, Equals, kc.Want_replicas)
+ if rep > 0 {
+ c.Check(locator, Matches, fmt.Sprintf(`^%s\+%d(\+.+)?$`, hash, len(content)))
+ }
+ } else {
+ c.Check(err, ErrorMatches, ".*503.*")
}
}
}
hash2, rep, err := kc.PutB([]byte("bar"))
c.Check(hash2, Equals, "")
c.Check(rep, Equals, 0)
- c.Check(err, FitsTypeOf, keepclient.InsufficientReplicasError(errors.New("")))
+ c.Check(err, FitsTypeOf, keepclient.InsufficientReplicasError{})
blocklen, _, err := kc.Ask(hash)
c.Check(err, FitsTypeOf, &keepclient.ErrNotFound{})
} else {
c.Check(hash2, Equals, "")
c.Check(rep, Equals, 0)
- c.Check(err, FitsTypeOf, keepclient.InsufficientReplicasError(errors.New("")))
+ c.Check(err, FitsTypeOf, keepclient.InsufficientReplicasError{})
}
logbuf.Reset()
}
"net/http"
"net/http/httptest"
"os"
- "regexp"
"sort"
"strings"
"time"
}
}
+func (s *HandlerSuite) TestReadsOrderedByStorageClassPriority(c *check.C) {
+ s.cluster.Volumes = map[string]arvados.Volume{
+ "zzzzz-nyw5e-111111111111111": {
+ Driver: "mock",
+ Replication: 1,
+ StorageClasses: map[string]bool{"class1": true}},
+ "zzzzz-nyw5e-222222222222222": {
+ Driver: "mock",
+ Replication: 1,
+ StorageClasses: map[string]bool{"class2": true, "class3": true}},
+ }
+
+ for _, trial := range []struct {
+ priority1 int // priority of class1, thus vol1
+ priority2 int // priority of class2
+ priority3 int // priority of class3 (vol2 priority will be max(priority2, priority3))
+ get1 int // expected number of "get" ops on vol1
+ get2 int // expected number of "get" ops on vol2
+ }{
+ {100, 50, 50, 1, 0}, // class1 has higher priority => try vol1 first, no need to try vol2
+ {100, 100, 100, 1, 0}, // same priority, vol1 is first lexicographically => try vol1 first and succeed
+ {66, 99, 33, 1, 1}, // class2 has higher priority => try vol2 first, then try vol1
+ {66, 33, 99, 1, 1}, // class3 has highest priority => vol2 has highest => try vol2 first, then try vol1
+ } {
+ c.Logf("%+v", trial)
+ s.cluster.StorageClasses = map[string]arvados.StorageClassConfig{
+ "class1": {Priority: trial.priority1},
+ "class2": {Priority: trial.priority2},
+ "class3": {Priority: trial.priority3},
+ }
+ c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
+ IssueRequest(s.handler,
+ &RequestTester{
+ method: "PUT",
+ uri: "/" + TestHash,
+ requestBody: TestBlock,
+ storageClasses: "class1",
+ })
+ IssueRequest(s.handler,
+ &RequestTester{
+ method: "GET",
+ uri: "/" + TestHash,
+ })
+ c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-111111111111111"].Volume.(*MockVolume).CallCount("Get"), check.Equals, trial.get1)
+ c.Check(s.handler.volmgr.mountMap["zzzzz-nyw5e-222222222222222"].Volume.(*MockVolume).CallCount("Get"), check.Equals, trial.get2)
+ }
+}
+
// Test TOUCH requests.
func (s *HandlerSuite) TestTouchHandler(c *check.C) {
c.Assert(s.handler.setup(context.Background(), s.cluster, "", prometheus.NewRegistry(), testServiceURL), check.IsNil)
expected := `^` + TestHash + `\+\d+ \d+\n` +
TestHash2 + `\+\d+ \d+\n\n$`
- match, _ := regexp.MatchString(expected, response.Body.String())
- if !match {
- c.Errorf(
- "permissions on, superuser request: expected %s, got:\n%s",
- expected, response.Body.String())
- }
+ c.Check(response.Body.String(), check.Matches, expected, check.Commentf(
+ "permissions on, superuser request"))
// superuser /index/prefix request
// => OK
response)
expected = `^` + TestHash + `\+\d+ \d+\n\n$`
- match, _ = regexp.MatchString(expected, response.Body.String())
- if !match {
- c.Errorf(
- "permissions on, superuser /index/prefix request: expected %s, got:\n%s",
- expected, response.Body.String())
- }
+ c.Check(response.Body.String(), check.Matches, expected, check.Commentf(
+ "permissions on, superuser /index/prefix request"))
// superuser /index/{no-such-prefix} request
// => OK
for i, sc := range wantStorageClasses {
wantStorageClasses[i] = strings.TrimSpace(sc)
}
+ } else {
+ // none specified -- use configured default
+ for class, cfg := range rtr.cluster.StorageClasses {
+ if cfg.Default {
+ wantStorageClasses = append(wantStorageClasses, class)
+ }
+ }
}
buf, err := getBufferWithContext(ctx, bufs, int(req.ContentLength))
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
"fmt"
"io"
"math/big"
+ "sort"
"sync/atomic"
"time"
vm.writables = append(vm.writables, mnt)
}
}
+ // pri(i): return highest priority of any storage class
+ // offered by vm.readables[i]
+ pri := func(i int) int {
+ any, best := false, 0
+ for class := range vm.readables[i].KeepMount.StorageClasses {
+ if p := cluster.StorageClasses[class].Priority; !any || best < p {
+ best = p
+ any = true
+ }
+ }
+ return best
+ }
+ // sort vm.readables, first by highest priority of any offered
+ // storage class (highest->lowest), then by volume UUID
+ sort.Slice(vm.readables, func(i, j int) bool {
+ if pi, pj := pri(i), pri(j); pi != pj {
+ return pi > pj
+ } else {
+ return vm.readables[i].KeepMount.UUID < vm.readables[j].KeepMount.UUID
+ }
+ })
return vm, nil
}
if existing_groups.index(addgroup).nil?
# User should be in group, but isn't, so add them.
STDERR.puts "Add user #{username} to #{addgroup} group"
- system("adduser", username, addgroup)
+ system("usermod", "-aG", addgroup, username)
end
end
if groups.index(removegroup).nil?
# User is in a group, but shouldn't be, so remove them.
STDERR.puts "Remove user #{username} from #{removegroup} group"
- system("deluser", username, removegroup)
+ system("gpasswd", "-d", username, removegroup)
end
end
ProviderType: t3.small
VCPUs: 2
RAM: 2GiB
- IncludedScratch: 50GB
AddedScratch: 50GB
Price: 0.0208
c5large:
ProviderType: c5.large
VCPUs: 2
RAM: 4GiB
- IncludedScratch: 50GB
AddedScratch: 50GB
Price: 0.085
m5large:
ProviderType: m5.large
VCPUs: 2
RAM: 8GiB
- IncludedScratch: 50GB
AddedScratch: 50GB
Price: 0.096
c5xlarge:
ProviderType: c5.xlarge
VCPUs: 4
RAM: 8GiB
- IncludedScratch: 100GB
AddedScratch: 100GB
Price: 0.17
m5xlarge:
ProviderType: m5.xlarge
VCPUs: 4
RAM: 16GiB
- IncludedScratch: 100GB
AddedScratch: 100GB
Price: 0.192
m5xlarge_extradisk:
ProviderType: m5.xlarge
VCPUs: 4
RAM: 16GiB
- IncludedScratch: 400GB
AddedScratch: 400GB
Price: 0.193
c52xlarge:
ProviderType: c5.2xlarge
VCPUs: 8
RAM: 16GiB
- IncludedScratch: 200GB
AddedScratch: 200GB
Price: 0.34
m52xlarge:
ProviderType: m5.2xlarge
VCPUs: 8
RAM: 32GiB
- IncludedScratch: 200GB
AddedScratch: 200GB
Price: 0.384
c54xlarge:
ProviderType: c5.4xlarge
VCPUs: 16
RAM: 32GiB
- IncludedScratch: 400GB
AddedScratch: 400GB
Price: 0.68
m54xlarge:
ProviderType: m5.4xlarge
VCPUs: 16
RAM: 64GiB
- IncludedScratch: 400GB
AddedScratch: 400GB
Price: 0.768