#
# SPDX-License-Identifier: AGPL-3.0
-all: centos7/generated debian10/generated ubuntu1604/generated ubuntu1804/generated ubuntu2004/generated
+all: centos7/generated debian10/generated debian11/generated ubuntu1804/generated ubuntu2004/generated
centos7/generated: common-generated-all
test -d centos7/generated || mkdir centos7/generated
test -d debian10/generated || mkdir debian10/generated
cp -f -rlt debian10/generated common-generated/*
-ubuntu1604/generated: common-generated-all
- test -d ubuntu1604/generated || mkdir ubuntu1604/generated
- cp -f -rlt ubuntu1604/generated common-generated/*
+debian11/generated: common-generated-all
+ test -d debian11/generated || mkdir debian11/generated
+ cp -f -rlt debian11/generated common-generated/*
ubuntu1804/generated: common-generated-all
test -d ubuntu1804/generated || mkdir ubuntu1804/generated
#
# SPDX-License-Identifier: AGPL-3.0
-FROM ubuntu:xenial
+## dont use debian:11 here since the word 'bullseye' is used for rvm precompiled binaries
+FROM debian:bullseye
MAINTAINER Arvados Package Maintainers <packaging@arvados.org>
ENV DEBIAN_FRONTEND noninteractive
# Install dependencies.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python3 python-setuptools python3-setuptools python3-pip libcurl4-gnutls-dev libgnutls-dev curl git libattr1-dev libfuse-dev libpq-dev unzip tzdata python3-venv python3-dev libpam-dev
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python3 python3-setuptools python3-pip libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libgnutls28-dev libpq-dev unzip python3-venv python3-dev libpam-dev equivs
# Install virtualenv
RUN /usr/bin/pip3 install 'virtualenv<20'
curl -L https://get.rvm.io | bash -s stable && \
/usr/local/rvm/bin/rvm install 2.5 && \
/usr/local/rvm/bin/rvm alias create default ruby-2.5 && \
+ echo "gem: --no-document" >> /etc/gemrc && \
/usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.2.19 && \
/usr/local/rvm/bin/rvm-exec default gem install fpm --version 1.10.2
RUN git clone --depth 1 git://git.arvados.org/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle
ENV WORKSPACE /arvados
-CMD ["/usr/local/rvm/bin/rvm-exec", "default", "bash", "/jenkins/run-build-packages.sh", "--target", "ubuntu1604"]
+CMD ["/usr/local/rvm/bin/rvm-exec", "default", "bash", "/jenkins/run-build-packages.sh", "--target", "debian11"]
#
# SPDX-License-Identifier: AGPL-3.0
-all: centos7/generated debian10/generated ubuntu1604/generated ubuntu1804/generated ubuntu2004/generated
+all: centos7/generated debian10/generated debian11/generated ubuntu1804/generated ubuntu2004/generated
centos7/generated: common-generated-all
test -d centos7/generated || mkdir centos7/generated
test -d debian10/generated || mkdir debian10/generated
cp -f -rlt debian10/generated common-generated/*
-ubuntu1604/generated: common-generated-all
- test -d ubuntu1604/generated || mkdir ubuntu1604/generated
- cp -f -rlt ubuntu1604/generated common-generated/*
+debian11/generated: common-generated-all
+ test -d debian11/generated || mkdir debian11/generated
+ cp -f -rlt debian11/generated common-generated/*
ubuntu1804/generated: common-generated-all
test -d ubuntu1804/generated || mkdir ubuntu1804/generated
#
# SPDX-License-Identifier: AGPL-3.0
-FROM ubuntu:xenial
+FROM debian:bullseye
MAINTAINER Arvados Package Maintainers <packaging@arvados.org>
ENV DEBIAN_FRONTEND noninteractive
# Install dependencies
RUN apt-get update && \
- apt-get -y install --no-install-recommends curl ca-certificates
+ apt-get -y install --no-install-recommends curl ca-certificates gpg procps gpg-agent
# Install RVM
ADD generated/mpapis.asc /tmp/
curl -L https://get.rvm.io | bash -s stable && \
/usr/local/rvm/bin/rvm install 2.5 && \
/usr/local/rvm/bin/rvm alias create default ruby-2.5 && \
+ echo "gem: --no-document" >> /etc/gemrc && \
/usr/local/rvm/bin/rvm-exec default gem install bundler --version 2.2.19
# udev daemon can't start in a container, so don't try.
RUN mkdir -p /etc/udev/disabled
-RUN echo "deb file:///arvados/packages/ubuntu1604/ /" >>/etc/apt/sources.list
-
-# Add preferences file for the Arvados packages. This pins Arvados
-# packages at priority 501, so that older python dependency versions
-# are preferred in those cases where we need them
-ADD etc-apt-preferences.d-arvados /etc/apt/preferences.d/arvados
+RUN echo "deb file:///arvados/packages/debian11/ /" >>/etc/apt/sources.list
+++ /dev/null
-Package: *
-Pin: release o=Arvados
-Pin-Priority: 501
+++ /dev/null
-deb-common-test-packages.sh
\ No newline at end of file
+++ /dev/null
-deb-common-test-packages.sh
\ No newline at end of file
+++ /dev/null
-deb-common-test-packages.sh
\ No newline at end of file
elif ! [[ "$2" =~ (.*)-(.*) ]]; then
echo >&2 "FATAL: --build-version '$2' does not include an iteration. Try '${2}-1'?"
exit 1
+ elif ! [[ "$2" =~ ^[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+|)(~rc[0-9]+|~dev[0-9]+|)-[0-9]+$ ]]; then
+ echo >&2 "FATAL: --build-version '$2' is invalid, must match pattern ^[0-9]+\.[0-9]+\.[0-9]+(\.[0-9]+|)(~rc[0-9]+|~dev[0-9]+|)-[0-9]+$"
+ exit 1
else
ARVADOS_BUILDING_VERSION="${BASH_REMATCH[1]}"
ARVADOS_BUILDING_ITERATION="${BASH_REMATCH[2]}"
arvados-client
arvados-controller
arvados-dispatch-cloud
+ arvados-dispatch-lsf
arvados-docker-cleaner
arvados-git-httpd
arvados-health
"Arvados cluster controller daemon"
package_go_binary cmd/arvados-server arvados-dispatch-cloud \
"Arvados cluster cloud dispatch"
+package_go_binary cmd/arvados-server arvados-dispatch-lsf \
+ "Dispatch Arvados containers to an LSF cluster"
package_go_binary services/arv-git-httpd arvados-git-httpd \
"Provide authenticated http access to Arvados-hosted git repositories"
package_go_binary services/crunch-dispatch-local crunch-dispatch-local \
elif [[ "$FORMAT" == "deb" ]]; then
declare -A dd
dd[debian10]=buster
- dd[ubuntu1604]=xenial
+ dd[debian11]=bullseye
dd[ubuntu1804]=bionic
dd[ubuntu2004]=focal
D=${dd[$TARGET]}
After=network.target
AssertPathExists=/etc/arvados/config.yml
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
After=network.target
AssertPathExists=/etc/arvados/config.yml
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+[Unit]
+Description=arvados-dispatch-lsf
+Documentation=https://doc.arvados.org/
+After=network.target
+AssertPathExists=/etc/arvados/config.yml
+
+# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
+StartLimitIntervalSec=0
+
+[Service]
+Type=notify
+EnvironmentFile=-/etc/arvados/environment
+ExecStart=/usr/bin/arvados-dispatch-lsf
+# Set a reasonable default for the open file limit
+LimitNOFILE=65536
+Restart=always
+RestartSec=1
+
+# systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
+StartLimitInterval=0
+
+[Install]
+WantedBy=multi-user.target
After=network.target
AssertPathExists=/etc/arvados/config.yml
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
"git.arvados.org/arvados.git/lib/crunchrun"
"git.arvados.org/arvados.git/lib/dispatchcloud"
"git.arvados.org/arvados.git/lib/install"
+ "git.arvados.org/arvados.git/lib/lsf"
"git.arvados.org/arvados.git/lib/recovercollection"
"git.arvados.org/arvados.git/services/ws"
)
"controller": controller.Command,
"crunch-run": crunchrun.Command,
"dispatch-cloud": dispatchcloud.Command,
+ "dispatch-lsf": lsf.DispatchCommand,
"install": install.Command,
"init": install.InitCommand,
"recover-collection": recovercollection.Command,
- install/crunch2-slurm/configure-slurm.html.textile.liquid
- install/crunch2-slurm/install-compute-node.html.textile.liquid
- install/crunch2-slurm/install-test.html.textile.liquid
+ - Containers API (lsf):
+ - install/crunch2-lsf/install-dispatch.html.textile.liquid
- Additional configuration:
- install/container-shell-access.html.textile.liquid
- External dependencies:
h3. Debian and Ubuntu
-Ubuntu 16.04 (xenial) ships with Ruby 2.3, which is not supported by Arvados. Use "RVM":#rvm to install Ruby 2.5 or later.
-
Debian 10 (buster) and Ubuntu 18.04 (bionic) and later ship with Ruby 2.5, which is supported by Arvados.
<notextile>
When serving files that will render directly in the browser, it is important to properly configure the keep-web service to migitate cross-site-scripting (XSS) attacks. A HTML page can be stored in a collection. If an attacker causes a victim to visit that page through Workbench, the HTML will be rendered by the browser. If all collections are served at the same domain, the browser will consider collections as coming from the same origin, which will grant access to the same browsing data (cookies and local storage). This would enable malicious Javascript on that page to access Arvados on behalf of the victim.
-This can be mitigated by having separate domains for each collection, or limiting preview to circumstances where the collection is not accessed with the user's regular full-access token. For cluster administrators that understand the risks, this protection can also be turned off.
+This can be mitigated by having separate domains for each collection, or limiting preview to circumstances where the collection is not accessed with the user's regular full-access token. For clusters where this risk is acceptable, this protection can also be turned off by setting the @Collections/TrustAllContent@ configuration flag to true, see the "configuration reference":../admin/config.html for more detail.
The following "same origin" URL patterns are supported for public collections and collections shared anonymously via secret links (i.e., collections which can be served by keep-web without making use of any implicit credentials like cookies). See "Same-origin URLs" below.
This mainly affects Workbench's ability to show inline content, so it should be taken into account when configuring both services' URL schemes.
-You can read more about the definition of a _same-site_ request at the "RFC 6265bis-03 page":https://tools.ietf.org/html/draft-ietf-httpbis-rfc6265bis-03#section-5.2
\ No newline at end of file
+You can read more about the definition of a _same-site_ request at the "RFC 6265bis-03 page":https://tools.ietf.org/html/draft-ietf-httpbis-rfc6265bis-03#section-5.2
--- /dev/null
+---
+layout: default
+navsection: installguide
+title: Install the LSF dispatcher
+...
+{% comment %}
+Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: CC-BY-SA-3.0
+{% endcomment %}
+
+{% include 'notebox_begin_warning' %}
+arvados-dispatch-lsf is only relevant for on premises clusters that will spool jobs to LSF. Skip this section if you are installing a cloud cluster.
+{% include 'notebox_end' %}
+
+Containers can be dispatched to an LSF cluster. The dispatcher sends work to the cluster using LSF's @bsub@ command, so it works in a variety of LSF configurations.
+
+*LSF support is currently considered experimental.*
+
+Limitations include:
+* Arvados container priority is not propagated to LSF job priority. This can cause inefficient use of compute resources, and even deadlock if there are fewer compute nodes than concurrent Arvados workflows.
+* Combining LSF with docker may not work, depending on LSF configuration and user/group IDs (if LSF only sets up the configured user's primary group ID when executing the crunch-run process on a compute node, it may not have permission to connect to the docker daemon).
+
+In order to run containers, you must choose a user that has permission to set up FUSE mounts and run Singularity/Docker containers on each compute node. This install guide refers to this user as the @crunch@ user. We recommend you create this user on each compute node with the same UID and GID, and add it to the @fuse@ and @docker@ system groups to grant it the necessary permissions. However, you can run the dispatcher under any account with sufficient permissions across the cluster.
+
+Set up all of your compute nodes "as you would for a SLURM cluster":../crunch2-slurm/install-compute-node.html.
+
+
+h2(#update-config). Update config.yml
+
+Arvados-dispatch-lsf reads the common configuration file at @/etc/arvados/config.yml@.
+
+Review the following configuration parameters and adjust as needed.
+
+
+h3(#BsubSudoUser). Containers.LSF.BsubSudoUser
+
+arvados-dispatch-lsf uses @sudo@ to execute @bsub@, for example @sudo -E -u crunch bsub [...]@. This means the @crunch@ account must exist on the hosts where LSF jobs run ("execution hosts"), as well as on the host where you are installing the Arvados LSF dispatcher (the "submission host"). To use a user account other than @crunch@, configure @BsubSudoUser@:
+
+<notextile>
+<pre> Containers:
+ LSF:
+ <code class="userinput">BsubSudoUser: <b>lsfuser</b>
+</code></pre>
+</notextile>
+
+Alternatively, you can arrange for the arvados-dispatch-lsf process to run as an unprivileged user that has a corresponding account on all compute nodes, and disable the use of @sudo@ by specifying an empty string:
+
+<notextile>
+<pre> Containers:
+ LSF:
+ # Don't use sudo
+ <code class="userinput">BsubSudoUser: <b>""</b>
+</code></pre>
+</notextile>
+
+
+h3(#SbatchArguments). Containers.LSF.BsubArgumentsList
+
+When arvados-dispatch-lsf invokes @bsub@, you can add arguments to the command by specifying @BsubArgumentsList@. You can use this to send the jobs to specific cluster partitions or add resource requests. Set @BsubArgumentsList@ to an array of strings. For example:
+
+<notextile>
+<pre> Containers:
+ LSF:
+ <code class="userinput">BsubArgumentsList: <b>["-C", "0"]</b></code>
+</pre>
+</notextile>
+
+
+h3(#PollPeriod). Containers.PollInterval
+
+arvados-dispatch-lsf polls the API server periodically for new containers to run. The @PollInterval@ option controls how often this poll happens. Set this to a string of numbers suffixed with one of the time units @s@, @m@, or @h@. For example:
+
+<notextile>
+<pre> Containers:
+ <code class="userinput">PollInterval: <b>10s</b>
+</code></pre>
+</notextile>
+
+
+h3(#ReserveExtraRAM). Containers.ReserveExtraRAM: Extra RAM for jobs
+
+Extra RAM to reserve (in bytes) on each LSF job submitted by Arvados, which is added to the amount specified in the container's @runtime_constraints@. If not provided, the default value is zero.
+
+Supports suffixes @KB@, @KiB@, @MB@, @MiB@, @GB@, @GiB@, @TB@, @TiB@, @PB@, @PiB@, @EB@, @EiB@ (where @KB@ is 10[^3^], @KiB@ is 2[^10^], @MB@ is 10[^6^], @MiB@ is 2[^20^] and so forth).
+
+<notextile>
+<pre> Containers:
+ <code class="userinput">ReserveExtraRAM: <b>256MiB</b></code>
+</pre>
+</notextile>
+
+
+h3(#CrunchRunCommand-network). Containers.CrunchRunArgumentList: Using host networking for containers
+
+Older Linux kernels (prior to 3.18) have bugs in network namespace handling which can lead to compute node lockups. This by is indicated by blocked kernel tasks in "Workqueue: netns cleanup_net". If you are experiencing this problem, as a workaround you can disable use of network namespaces by Docker across the cluster. Be aware this reduces container isolation, which may be a security risk.
+
+<notextile>
+<pre> Containers:
+ <code class="userinput">CrunchRunArgumentsList:
+ - <b>"-container-enable-networking=always"</b>
+ - <b>"-container-network-mode=host"</b></code>
+</pre>
+</notextile>
+
+{% assign arvados_component = 'arvados-dispatch-lsf' %}
+
+{% include 'install_packages' %}
+
+{% include 'start_service' %}
+
+{% include 'restart_api' %}
h3(#ReserveExtraRAM). Containers.ReserveExtraRAM: Extra RAM for jobs
-Extra RAM to reserve (in bytes) on each Slurm job submitted by Arvados, which is added to the amount specified in the container's @runtime_constraints@. If not provided, the default value is zero. Helpful when using @-cgroup-parent-subsystem@, where @crunch-run@ and @arv-mount@ share the control group memory limit with the user process. In this situation, at least 256MiB is recommended to accomodate each container's @crunch-run@ and @arv-mount@ processes.
+Extra RAM to reserve (in bytes) on each Slurm job submitted by Arvados, which is added to the amount specified in the container's @runtime_constraints@. If not provided, the default value is zero. Helpful when using @-cgroup-parent-subsystem@, where @crunch-run@ and @arv-mount@ share the control group memory limit with the user process. In this situation, at least 256MiB is recommended to accommodate each container's @crunch-run@ and @arv-mount@ processes.
Supports suffixes @KB@, @KiB@, @MB@, @MiB@, @GB@, @GiB@, @TB@, @TiB@, @PB@, @PiB@, @EB@, @EiB@ (where @KB@ is 10[^3^], @KiB@ is 2[^10^], @MB@ is 10[^6^], @MiB@ is 2[^20^] and so forth).
{% include 'notebox_begin' %}
Whether you choose to serve collections from their own subdomain or from a single domain, it's important to keep in mind that they should be served from me same _site_ as Workbench for the inline previews to work.
-Please check "keep-web's URL pattern guide":/api/keep-web-urls.html#same-site to learn more.
+Please check "keep-web's URL pattern guide":../api/keep-web-urls.html#same-site to learn more.
{% include 'notebox_end' %}
h2. Set InternalURLs
table(table table-bordered table-condensed).
|_. Distribution|_. State|_. Last supported version|
|CentOS 7|Supported|Latest|
+|Debian 11 ("bullseye")|Supported|Latest|
|Debian 10 ("buster")|Supported|Latest|
|Ubuntu 20.04 ("focal")|Supported|Latest|
|Ubuntu 18.04 ("bionic")|Supported|Latest|
h2(#dnstls). DNS entries and TLS certificates
-The following services are normally public-facing and require DNS entries and corresponding TLS certificates. Get certificates from your preferred TLS certificate provider. We recommend using "Let's Encrypt":https://letsencrypt.org/. You can run several services on same node, but each distinct hostname requires its own TLS certificate.
+The following services are normally public-facing and require DNS entries and corresponding TLS certificates. Get certificates from your preferred TLS certificate provider. We recommend using "Let's Encrypt":https://letsencrypt.org/. You can run several services on the same node, but each distinct DNS name requires a valid, matching TLS certificate.
-This guide uses the following hostname conventions. A later part of this guide will describe how to set up Nginx virtual hosts.
+This guide uses the following DNS name conventions. A later part of this guide will describe how to set up Nginx virtual hosts.
<div class="offset1">
table(table table-bordered table-condensed).
-|_. Function|_. Hostname|
+|_. Function|_. DNS name|
|Arvados API|@ClusterID.example.com@|
|Arvados Git server|git.@ClusterID.example.com@|
+|Arvados Webshell|webshell.@ClusterID.example.com@|
|Arvados Websockets endpoint|ws.@ClusterID.example.com@|
|Arvados Workbench|workbench.@ClusterID.example.com@|
|Arvados Workbench 2|workbench2.@ClusterID.example.com@|
|Arvados Keepproxy server|keep.@ClusterID.example.com@|
|Arvados Keep-web server|download.@ClusterID.example.com@
_and_
-*.collections.@ClusterID.example.com@ or
-*<notextile>--</notextile>collections.@ClusterID.example.com@ or
+*.collections.@ClusterID.example.com@ _or_
+*<notextile>--</notextile>collections.@ClusterID.example.com@ _or_
collections.@ClusterID.example.com@ (see the "keep-web install docs":install-keep-web.html)|
</div>
+Setting up Arvados is easiest when Wildcard TLS and wildcard DNS are available. It is also possible to set up Arvados without wildcard TLS and DNS, but not having a wildcard for @keep-web@ (i.e. not having *.collections.@ClusterID.example.com@) comes with a tradeoff: it will disable some features that allow users to view Arvados-hosted data in their browsers. More information on this tradeoff caused by the CORS rules applied by modern browsers is available in the "keep-web URL pattern guide":../api/keep-web-urls.html.
+
+The table below lists the required TLS certificates and DNS names in each scenario.
+
+<div class="offset1">
+table(table table-bordered table-condensed).
+||_. Wildcard TLS and DNS available|_. Wildcard TLS available|_. Other|
+|TLS|*.@ClusterID.example.com@
+@ClusterID.example.com@
+*.collections.@ClusterID.example.com@|*.@ClusterID.example.com@
+@ClusterID.example.com@|@ClusterID.example.com@
+git.@ClusterID.example.com@
+webshell.@ClusterID.example.com@
+ws.@ClusterID.example.com@
+workbench.@ClusterID.example.com@
+workbench2.@ClusterID.example.com@
+keep.@ClusterID.example.com@
+download.@ClusterID.example.com@
+collections.@ClusterID.example.com@|
+|DNS|@ClusterID.example.com@
+git.@ClusterID.example.com@
+webshell.@ClusterID.example.com@
+ws.@ClusterID.example.com@
+workbench.@ClusterID.example.com@
+workbench2.@ClusterID.example.com@
+keep.@ClusterID.example.com@
+download.@ClusterID.example.com@
+*.collections.@ClusterID.example.com@|@ClusterID.example.com@
+git.@ClusterID.example.com@
+webshell.@ClusterID.example.com@
+ws.@ClusterID.example.com@
+workbench.@ClusterID.example.com@
+workbench2.@ClusterID.example.com@
+keep.@ClusterID.example.com@
+download.@ClusterID.example.com@
+collections.@ClusterID.example.com@|@ClusterID.example.com@
+git.@ClusterID.example.com@
+webshell.@ClusterID.example.com@
+ws.@ClusterID.example.com@
+workbench.@ClusterID.example.com@
+workbench2.@ClusterID.example.com@
+keep.@ClusterID.example.com@
+download.@ClusterID.example.com@
+collections.@ClusterID.example.com@|
+</div>
+
{% include 'notebox_begin' %}
It is also possible to create your own certificate authority, issue server certificates, and install a custom root certificate in the browser. This is out of scope for this guide.
{% include 'notebox_end' %}
table(table table-bordered table-condensed).
|_. OS version|_. Command|
+|Debian 11 ("bullseye")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/buster bullseye main" | tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
|Debian 10 ("buster")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/buster buster main" | tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
|Ubuntu 20.04 ("focal")[1]|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/focal focal main" | tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
|Ubuntu 18.04 ("bionic")[1]|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/bionic bionic main" | tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
DispatchCloud:
InternalURLs: {SAMPLE: {}}
ExternalURL: "-"
+ DispatchLSF:
+ InternalURLs: {SAMPLE: {}}
+ ExternalURL: "-"
Keepproxy:
InternalURLs: {SAMPLE: {}}
ExternalURL: ""
# WebDAV would have to expose XSS vulnerabilities in order to
# handle the redirect (see discussion on Services.WebDAV).
#
- # This setting has no effect in the recommended configuration,
- # where the WebDAV is configured to have a separate domain for
- # every collection; in this case XSS protection is provided by
- # browsers' same-origin policy.
+ # This setting has no effect in the recommended configuration, where the
+ # WebDAV service is configured to have a separate domain for every
+ # collection and XSS protection is provided by browsers' same-origin
+ # policy.
#
# The default setting (false) is appropriate for a multi-user site.
TrustAllContent: false
# (See http://ruby-doc.org/core-2.2.2/Kernel.html#method-i-format for more.)
AssignNodeHostname: "compute%<slot_number>d"
+ LSF:
+ # Additional arguments to bsub when submitting Arvados
+ # containers as LSF jobs.
+ BsubArgumentsList: []
+
+ # Use sudo to switch to this user account when submitting LSF
+ # jobs.
+ #
+ # This account must exist on the hosts where LSF jobs run
+ # ("execution hosts"), as well as on the host where the
+ # Arvados LSF dispatcher runs ("submission host").
+ BsubSudoUser: "crunch"
+
JobsAPI:
# Enable the legacy 'jobs' API (crunch v1). This value must be a string.
#
"Containers.JobsAPI.GitInternalDir": false,
"Containers.Logging": false,
"Containers.LogReuseDecisions": false,
+ "Containers.LSF": false,
"Containers.MaxComputeVMs": false,
"Containers.MaxDispatchAttempts": false,
"Containers.MaxRetryAttempts": true,
DispatchCloud:
InternalURLs: {SAMPLE: {}}
ExternalURL: "-"
+ DispatchLSF:
+ InternalURLs: {SAMPLE: {}}
+ ExternalURL: "-"
Keepproxy:
InternalURLs: {SAMPLE: {}}
ExternalURL: ""
# WebDAV would have to expose XSS vulnerabilities in order to
# handle the redirect (see discussion on Services.WebDAV).
#
- # This setting has no effect in the recommended configuration,
- # where the WebDAV is configured to have a separate domain for
- # every collection; in this case XSS protection is provided by
- # browsers' same-origin policy.
+ # This setting has no effect in the recommended configuration, where the
+ # WebDAV service is configured to have a separate domain for every
+ # collection and XSS protection is provided by browsers' same-origin
+ # policy.
#
# The default setting (false) is appropriate for a multi-user site.
TrustAllContent: false
# (See http://ruby-doc.org/core-2.2.2/Kernel.html#method-i-format for more.)
AssignNodeHostname: "compute%<slot_number>d"
+ LSF:
+ # Additional arguments to bsub when submitting Arvados
+ # containers as LSF jobs.
+ BsubArgumentsList: []
+
+ # Use sudo to switch to this user account when submitting LSF
+ # jobs.
+ #
+ # This account must exist on the hosts where LSF jobs run
+ # ("execution hosts"), as well as on the host where the
+ # Arvados LSF dispatcher runs ("submission host").
+ BsubSudoUser: "crunch"
+
JobsAPI:
# Enable the legacy 'jobs' API (crunch v1). This value must be a string.
#
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadostest"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "git.arvados.org/arvados.git/sdk/go/httpserver"
check "gopkg.in/check.v1"
)
}
}
+func (s *IntegrationSuite) TestRequestIDHeader(c *check.C) {
+ conn1 := s.testClusters["z1111"].Conn()
+ rootctx1, _, _ := s.testClusters["z1111"].RootClients()
+ userctx1, ac1, _, _ := s.testClusters["z1111"].UserClients(rootctx1, c, conn1, "user@example.com", true)
+
+ coll, err := conn1.CollectionCreate(userctx1, arvados.CreateOptions{})
+ c.Check(err, check.IsNil)
+ specimen, err := conn1.SpecimenCreate(userctx1, arvados.CreateOptions{})
+ c.Check(err, check.IsNil)
+
+ tests := []struct {
+ path string
+ reqIdProvided bool
+ notFoundRequest bool
+ }{
+ {"/arvados/v1/collections", false, false},
+ {"/arvados/v1/collections", true, false},
+ {"/arvados/v1/nonexistant", false, true},
+ {"/arvados/v1/nonexistant", true, true},
+ {"/arvados/v1/collections/" + coll.UUID, false, false},
+ {"/arvados/v1/collections/" + coll.UUID, true, false},
+ {"/arvados/v1/specimens/" + specimen.UUID, false, false},
+ {"/arvados/v1/specimens/" + specimen.UUID, true, false},
+ {"/arvados/v1/collections/z1111-4zz18-0123456789abcde", false, true},
+ {"/arvados/v1/collections/z1111-4zz18-0123456789abcde", true, true},
+ {"/arvados/v1/specimens/z1111-j58dm-0123456789abcde", false, true},
+ {"/arvados/v1/specimens/z1111-j58dm-0123456789abcde", true, true},
+ }
+
+ for _, tt := range tests {
+ c.Log(c.TestName() + " " + tt.path)
+ req, err := http.NewRequest("GET", "https://"+ac1.APIHost+tt.path, nil)
+ c.Assert(err, check.IsNil)
+ customReqId := "abcdeG"
+ if !tt.reqIdProvided {
+ c.Assert(req.Header.Get("X-Request-Id"), check.Equals, "")
+ } else {
+ req.Header.Set("X-Request-Id", customReqId)
+ }
+ resp, err := ac1.Do(req)
+ c.Assert(err, check.IsNil)
+ if tt.notFoundRequest {
+ c.Check(resp.StatusCode, check.Equals, http.StatusNotFound)
+ } else {
+ c.Check(resp.StatusCode, check.Equals, http.StatusOK)
+ }
+ if !tt.reqIdProvided {
+ c.Check(resp.Header.Get("X-Request-Id"), check.Matches, "^req-[0-9a-zA-Z]{20}$")
+ if tt.notFoundRequest {
+ var jresp httpserver.ErrorResponse
+ err := json.NewDecoder(resp.Body).Decode(&jresp)
+ c.Check(err, check.IsNil)
+ c.Assert(jresp.Errors, check.HasLen, 1)
+ c.Check(jresp.Errors[0], check.Matches, "^.*(req-[0-9a-zA-Z]{20}).*$")
+ }
+ } else {
+ c.Check(resp.Header.Get("X-Request-Id"), check.Equals, customReqId)
+ if tt.notFoundRequest {
+ var jresp httpserver.ErrorResponse
+ err := json.NewDecoder(resp.Body).Decode(&jresp)
+ c.Check(err, check.IsNil)
+ c.Assert(jresp.Errors, check.HasLen, 1)
+ c.Check(jresp.Errors[0], check.Matches, "^.*("+customReqId+").*$")
+ }
+ }
+ }
+}
+
// We test the direct access to the database
// normally an integration test would not have a database access, but in this case we need
// to test tokens that are secret, so there is no API response that will give them back
return "", fmt.Errorf("cannot choose from multiple tar files in image collection: %v", tarfiles)
}
imageID := tarfiles[0][:len(tarfiles[0])-4]
- imageFile := runner.ArvMountPoint + "/by_id/" + runner.Container.ContainerImage + "/" + tarfiles[0]
+ imageTarballPath := runner.ArvMountPoint + "/by_id/" + runner.Container.ContainerImage + "/" + imageID + ".tar"
runner.CrunchLog.Printf("Using Docker image id %q", imageID)
- if !runner.executor.ImageLoaded(imageID) {
- runner.CrunchLog.Print("Loading Docker image from keep")
- err = runner.executor.LoadImage(imageFile)
- if err != nil {
- return "", err
- }
- } else {
- runner.CrunchLog.Print("Docker image is available")
+ runner.CrunchLog.Print("Loading Docker image from keep")
+ err = runner.executor.LoadImage(imageID, imageTarballPath, runner.Container, runner.ArvMountPoint,
+ runner.containerClient)
+ if err != nil {
+ return "", err
}
+
return imageID, nil
}
} else {
arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_id")
}
+ arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_uuid")
arvMountCmd = append(arvMountCmd, runner.ArvMountPoint)
runner.ArvMount, err = runner.RunArvMount(arvMountCmd, token)
}
}
}
+ runner.ArvMount = nil
}
if runner.ArvMountPoint != "" {
if rmerr := os.Remove(runner.ArvMountPoint); rmerr != nil {
runner.CrunchLog.Printf("While cleaning up arv-mount directory %s: %v", runner.ArvMountPoint, rmerr)
}
+ runner.ArvMountPoint = ""
}
if rmerr := os.RemoveAll(runner.parentTemp); rmerr != nil {
}
checkErr("stopHoststat", runner.stopHoststat())
checkErr("CommitLogs", runner.CommitLogs())
+ runner.CleanupDirs()
checkErr("UpdateContainerFinal", runner.UpdateContainerFinal())
}()
exit chan int
}
-func (e *stubExecutor) ImageLoaded(imageID string) bool { return e.imageLoaded }
-func (e *stubExecutor) LoadImage(filename string) error { e.loaded = filename; return e.loadErr }
+func (e *stubExecutor) LoadImage(imageId string, tarball string, container arvados.Container, keepMount string,
+ containerClient *arvados.Client) error {
+ e.loaded = tarball
+ return e.loadErr
+}
func (e *stubExecutor) Create(spec containerSpec) error { e.created = spec; return e.createErr }
func (e *stubExecutor) Start() error { e.exit = make(chan int, 1); go e.runFunc(); return e.startErr }
func (e *stubExecutor) CgroupID() string { return "cgroupid" }
imageID, err = s.runner.LoadImage()
c.Check(err, ErrorMatches, "image collection does not include a \\.tar image file")
c.Check(s.executor.loaded, Equals, "")
-
- // if executor reports image is already loaded, LoadImage should not be called
- s.runner.Container.ContainerImage = arvadostest.DockerImage112PDH
- s.executor.imageLoaded = true
- s.executor.loaded = ""
- s.executor.loadErr = nil
- imageID, err = s.runner.LoadImage()
- c.Check(err, IsNil)
- c.Check(s.executor.loaded, Equals, "")
- c.Check(imageID, Equals, strings.TrimSuffix(arvadostest.DockerImage112Filename, ".tar"))
}
type ArvErrorTestClient struct{}
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}})
os.RemoveAll(cr.ArvMountPoint)
cr.CleanupDirs()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "foo,bar", "--crunchstat-interval=5",
- "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/out": {realTemp + "/tmp2", false}, "/tmp": {realTemp + "/tmp3", false}})
os.RemoveAll(cr.ArvMountPoint)
cr.CleanupDirs()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/tmp": {realTemp + "/tmp2", false}, "/etc/arvados/ca-certificates.crt": {stubCertPath, true}})
os.RemoveAll(cr.ArvMountPoint)
cr.CleanupDirs()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{"/keeptmp": {realTemp + "/keep1/tmp0", false}})
os.RemoveAll(cr.ArvMountPoint)
cr.CleanupDirs()
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{
"/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
"/keepout": {realTemp + "/keep1/tmp0", false},
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{
"/keepinp": {realTemp + "/keep1/by_id/59389a8f9ee9d399be35462a0f92541c+53", true},
"/keepout": {realTemp + "/keep1/tmp0", false},
c.Check(err, IsNil)
c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other",
"--read-write", "--storage-classes", "default", "--crunchstat-interval=5",
- "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+ "--file-cache", "512", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "--mount-by-id", "by_uuid", realTemp + "/keep1"})
c.Check(bindmounts, DeepEquals, map[string]bindmount{
"/tmp": {realTemp + "/tmp2", false},
"/tmp/foo": {realTemp + "/keep1/tmp0", true},
"strings"
"time"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
dockertypes "github.com/docker/docker/api/types"
dockercontainer "github.com/docker/docker/api/types/container"
dockerclient "github.com/docker/docker/client"
}, err
}
-func (e *dockerExecutor) ImageLoaded(imageID string) bool {
+func (e *dockerExecutor) LoadImage(imageID string, imageTarballPath string, container arvados.Container, arvMountPoint string,
+ containerClient *arvados.Client) error {
_, _, err := e.dockerclient.ImageInspectWithRaw(context.TODO(), imageID)
- return err == nil
-}
+ if err == nil {
+ // already loaded
+ return nil
+ }
-func (e *dockerExecutor) LoadImage(filename string) error {
- f, err := os.Open(filename)
+ f, err := os.Open(imageTarballPath)
if err != nil {
return err
}
import (
"io"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
"golang.org/x/net/context"
)
// containerExecutor is an interface to a container runtime
// (docker/singularity).
type containerExecutor interface {
- // ImageLoaded determines whether the given image is already
- // available to use without calling ImageLoad.
- ImageLoaded(imageID string) bool
-
// ImageLoad loads the image from the given tarball such that
// it can be used to create/start a container.
- LoadImage(filename string) error
+ LoadImage(imageID string, imageTarballPath string, container arvados.Container, keepMount string,
+ containerClient *arvados.Client) error
// Wait for the container process to finish, and return its
// exit code. If applicable, also remove the stopped container
"strings"
"time"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
"golang.org/x/net/context"
. "gopkg.in/check.v1"
)
Stdout: nopWriteCloser{&s.stdout},
Stderr: nopWriteCloser{&s.stderr},
}
- err := s.executor.LoadImage(busyboxDockerImage(c))
+ err := s.executor.LoadImage("", busyboxDockerImage(c), arvados.Container{}, "", nil)
c.Assert(err, IsNil)
}
package crunchrun
import (
+ "fmt"
"io/ioutil"
"os"
"os/exec"
"sort"
"syscall"
+ "time"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
"golang.org/x/net/context"
)
}, nil
}
-func (e *singularityExecutor) ImageLoaded(string) bool {
- return false
+func (e *singularityExecutor) getOrCreateProject(ownerUuid string, name string, containerClient *arvados.Client) (*arvados.Group, error) {
+ var gp arvados.GroupList
+ err := containerClient.RequestAndDecode(&gp,
+ arvados.EndpointGroupList.Method,
+ arvados.EndpointGroupList.Path,
+ nil, arvados.ListOptions{Filters: []arvados.Filter{
+ arvados.Filter{"owner_uuid", "=", ownerUuid},
+ arvados.Filter{"name", "=", name},
+ arvados.Filter{"group_class", "=", "project"},
+ },
+ Limit: 1})
+ if err != nil {
+ return nil, err
+ }
+ if len(gp.Items) == 1 {
+ return &gp.Items[0], nil
+ }
+
+ var rgroup arvados.Group
+ err = containerClient.RequestAndDecode(&rgroup,
+ arvados.EndpointGroupCreate.Method,
+ arvados.EndpointGroupCreate.Path,
+ nil, map[string]interface{}{
+ "group": map[string]string{
+ "owner_uuid": ownerUuid,
+ "name": name,
+ "group_class": "project",
+ },
+ })
+ if err != nil {
+ return nil, err
+ }
+ return &rgroup, nil
+}
+
+func (e *singularityExecutor) checkImageCache(dockerImageID string, container arvados.Container, arvMountPoint string,
+ containerClient *arvados.Client) (collection *arvados.Collection, err error) {
+
+ // Cache the image to keep
+ cacheGroup, err := e.getOrCreateProject(container.RuntimeUserUUID, ".cache", containerClient)
+ if err != nil {
+ return nil, fmt.Errorf("error getting '.cache' project: %v", err)
+ }
+ imageGroup, err := e.getOrCreateProject(cacheGroup.UUID, "auto-generated singularity images", containerClient)
+ if err != nil {
+ return nil, fmt.Errorf("error getting 'auto-generated singularity images' project: %s", err)
+ }
+
+ collectionName := fmt.Sprintf("singularity image for %v", dockerImageID)
+ var cl arvados.CollectionList
+ err = containerClient.RequestAndDecode(&cl,
+ arvados.EndpointCollectionList.Method,
+ arvados.EndpointCollectionList.Path,
+ nil, arvados.ListOptions{Filters: []arvados.Filter{
+ arvados.Filter{"owner_uuid", "=", imageGroup.UUID},
+ arvados.Filter{"name", "=", collectionName},
+ },
+ Limit: 1})
+ if err != nil {
+ return nil, fmt.Errorf("error querying for collection '%v': %v", collectionName, err)
+ }
+ var imageCollection arvados.Collection
+ if len(cl.Items) == 1 {
+ imageCollection = cl.Items[0]
+ } else {
+ collectionName := collectionName + " " + time.Now().UTC().Format(time.RFC3339)
+ exp := time.Now().Add(24 * 7 * 2 * time.Hour)
+ err = containerClient.RequestAndDecode(&imageCollection,
+ arvados.EndpointCollectionCreate.Method,
+ arvados.EndpointCollectionCreate.Path,
+ nil, map[string]interface{}{
+ "collection": map[string]string{
+ "owner_uuid": imageGroup.UUID,
+ "name": collectionName,
+ "trash_at": exp.UTC().Format(time.RFC3339),
+ },
+ })
+ if err != nil {
+ return nil, fmt.Errorf("error creating '%v' collection: %s", collectionName, err)
+ }
+
+ }
+
+ return &imageCollection, nil
}
// LoadImage will satisfy ContainerExecuter interface transforming
// containerImage into a sif file for later use.
-func (e *singularityExecutor) LoadImage(imageTarballPath string) error {
- e.logf("building singularity image")
- // "singularity build" does not accept a
- // docker-archive://... filename containing a ":" character,
- // as in "/path/to/sha256:abcd...1234.tar". Workaround: make a
- // symlink that doesn't have ":" chars.
- err := os.Symlink(imageTarballPath, e.tmpdir+"/image.tar")
+func (e *singularityExecutor) LoadImage(dockerImageID string, imageTarballPath string, container arvados.Container, arvMountPoint string,
+ containerClient *arvados.Client) error {
+
+ var imageFilename string
+ var sifCollection *arvados.Collection
+ var err error
+ if containerClient != nil {
+ sifCollection, err = e.checkImageCache(dockerImageID, container, arvMountPoint, containerClient)
+ if err != nil {
+ return err
+ }
+ imageFilename = fmt.Sprintf("%s/by_uuid/%s/image.sif", arvMountPoint, sifCollection.UUID)
+ } else {
+ imageFilename = e.tmpdir + "/image.sif"
+ }
+
+ if _, err := os.Stat(imageFilename); os.IsNotExist(err) {
+ e.logf("building singularity image")
+ // "singularity build" does not accept a
+ // docker-archive://... filename containing a ":" character,
+ // as in "/path/to/sha256:abcd...1234.tar". Workaround: make a
+ // symlink that doesn't have ":" chars.
+ err := os.Symlink(imageTarballPath, e.tmpdir+"/image.tar")
+ if err != nil {
+ return err
+ }
+
+ build := exec.Command("singularity", "build", imageFilename, "docker-archive://"+e.tmpdir+"/image.tar")
+ e.logf("%v", build.Args)
+ out, err := build.CombinedOutput()
+ // INFO: Starting build...
+ // Getting image source signatures
+ // Copying blob ab15617702de done
+ // Copying config 651e02b8a2 done
+ // Writing manifest to image destination
+ // Storing signatures
+ // 2021/04/22 14:42:14 info unpack layer: sha256:21cbfd3a344c52b197b9fa36091e66d9cbe52232703ff78d44734f85abb7ccd3
+ // INFO: Creating SIF file...
+ // INFO: Build complete: arvados-jobs.latest.sif
+ e.logf("%s", out)
+ if err != nil {
+ return err
+ }
+ }
+
+ if containerClient == nil {
+ e.imageFilename = imageFilename
+ return nil
+ }
+
+ // update TTL to now + two weeks
+ exp := time.Now().Add(24 * 7 * 2 * time.Hour)
+
+ uuidPath, err := containerClient.PathForUUID("update", sifCollection.UUID)
if err != nil {
- return err
+ e.logf("error PathForUUID: %v", err)
+ return nil
+ }
+ var imageCollection arvados.Collection
+ err = containerClient.RequestAndDecode(&imageCollection,
+ arvados.EndpointCollectionUpdate.Method,
+ uuidPath,
+ nil, map[string]interface{}{
+ "collection": map[string]string{
+ "name": fmt.Sprintf("singularity image for %v", dockerImageID),
+ "trash_at": exp.UTC().Format(time.RFC3339),
+ },
+ })
+ if err == nil {
+ // If we just wrote the image to the cache, the
+ // response also returns the updated PDH
+ e.imageFilename = fmt.Sprintf("%s/by_id/%s/image.sif", arvMountPoint, imageCollection.PortableDataHash)
+ return nil
}
- e.imageFilename = e.tmpdir + "/image.sif"
- build := exec.Command("singularity", "build", e.imageFilename, "docker-archive://"+e.tmpdir+"/image.tar")
- e.logf("%v", build.Args)
- out, err := build.CombinedOutput()
- // INFO: Starting build...
- // Getting image source signatures
- // Copying blob ab15617702de done
- // Copying config 651e02b8a2 done
- // Writing manifest to image destination
- // Storing signatures
- // 2021/04/22 14:42:14 info unpack layer: sha256:21cbfd3a344c52b197b9fa36091e66d9cbe52232703ff78d44734f85abb7ccd3
- // INFO: Creating SIF file...
- // INFO: Build complete: arvados-jobs.latest.sif
- e.logf("%s", out)
+
+ e.logf("error updating/renaming collection for cached sif image: %v", err)
+ // Failed to update but maybe it lost a race and there is
+ // another cached collection in the same place, so check the cache
+ // again
+ sifCollection, err = e.checkImageCache(dockerImageID, container, arvMountPoint, containerClient)
if err != nil {
return err
}
+ e.imageFilename = fmt.Sprintf("%s/by_id/%s/image.sif", arvMountPoint, sifCollection.PortableDataHash)
+
return nil
}
mount := e.spec.BindMounts[path]
args = append(args, "--bind", mount.HostPath+":"+path+":"+readonlyflag[mount.ReadOnly])
}
- args = append(args, e.imageFilename)
- args = append(args, e.spec.Command...)
// This is for singularity 3.5.2. There are some behaviors
// that will change in singularity 3.6, please see:
// https://sylabs.io/guides/3.5/user-guide/environment_and_metadata.html
env := make([]string, 0, len(e.spec.Env))
for k, v := range e.spec.Env {
- env = append(env, "SINGULARITYENV_"+k+"="+v)
+ if k == "HOME" {
+ // $HOME is a special case
+ args = append(args, "--home="+v)
+ } else {
+ env = append(env, "SINGULARITYENV_"+k+"="+v)
+ }
}
+ args = append(args, e.imageFilename)
+ args = append(args, e.spec.Command...)
+
path, err := exec.LookPath(args[0])
if err != nil {
return err
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package lsf
+
+import (
+ "context"
+ "errors"
+ "fmt"
+ "math"
+ "net/http"
+ "regexp"
+ "strings"
+ "sync"
+ "time"
+
+ "git.arvados.org/arvados.git/lib/cmd"
+ "git.arvados.org/arvados.git/lib/dispatchcloud"
+ "git.arvados.org/arvados.git/lib/service"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadosclient"
+ "git.arvados.org/arvados.git/sdk/go/auth"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "git.arvados.org/arvados.git/sdk/go/dispatch"
+ "git.arvados.org/arvados.git/sdk/go/health"
+ "github.com/julienschmidt/httprouter"
+ "github.com/prometheus/client_golang/prometheus"
+ "github.com/prometheus/client_golang/prometheus/promhttp"
+ "github.com/sirupsen/logrus"
+)
+
+var DispatchCommand cmd.Handler = service.Command(arvados.ServiceNameDispatchLSF, newHandler)
+
+func newHandler(ctx context.Context, cluster *arvados.Cluster, token string, reg *prometheus.Registry) service.Handler {
+ ac, err := arvados.NewClientFromConfig(cluster)
+ if err != nil {
+ return service.ErrorHandler(ctx, cluster, fmt.Errorf("error initializing client from cluster config: %s", err))
+ }
+ d := &dispatcher{
+ Cluster: cluster,
+ Context: ctx,
+ ArvClient: ac,
+ AuthToken: token,
+ Registry: reg,
+ }
+ go d.Start()
+ return d
+}
+
+type dispatcher struct {
+ Cluster *arvados.Cluster
+ Context context.Context
+ ArvClient *arvados.Client
+ AuthToken string
+ Registry *prometheus.Registry
+
+ logger logrus.FieldLogger
+ lsfcli lsfcli
+ lsfqueue lsfqueue
+ arvDispatcher *dispatch.Dispatcher
+ httpHandler http.Handler
+
+ initOnce sync.Once
+ stop chan struct{}
+ stopped chan struct{}
+}
+
+// Start starts the dispatcher. Start can be called multiple times
+// with no ill effect.
+func (disp *dispatcher) Start() {
+ disp.initOnce.Do(func() {
+ disp.init()
+ go func() {
+ disp.checkLsfQueueForOrphans()
+ err := disp.arvDispatcher.Run(disp.Context)
+ if err != nil {
+ disp.logger.Error(err)
+ disp.Close()
+ }
+ }()
+ })
+}
+
+// ServeHTTP implements service.Handler.
+func (disp *dispatcher) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+ disp.Start()
+ disp.httpHandler.ServeHTTP(w, r)
+}
+
+// CheckHealth implements service.Handler.
+func (disp *dispatcher) CheckHealth() error {
+ disp.Start()
+ select {
+ case <-disp.stopped:
+ return errors.New("stopped")
+ default:
+ return nil
+ }
+}
+
+// Done implements service.Handler.
+func (disp *dispatcher) Done() <-chan struct{} {
+ return disp.stopped
+}
+
+// Stop dispatching containers and release resources. Used by tests.
+func (disp *dispatcher) Close() {
+ disp.Start()
+ select {
+ case disp.stop <- struct{}{}:
+ default:
+ }
+ <-disp.stopped
+}
+
+func (disp *dispatcher) init() {
+ disp.logger = ctxlog.FromContext(disp.Context)
+ disp.lsfcli.logger = disp.logger
+ disp.lsfqueue = lsfqueue{
+ logger: disp.logger,
+ period: time.Duration(disp.Cluster.Containers.CloudVMs.PollInterval),
+ lsfcli: &disp.lsfcli,
+ }
+ disp.ArvClient.AuthToken = disp.AuthToken
+ disp.stop = make(chan struct{}, 1)
+ disp.stopped = make(chan struct{})
+
+ arv, err := arvadosclient.New(disp.ArvClient)
+ if err != nil {
+ disp.logger.Fatalf("Error making Arvados client: %v", err)
+ }
+ arv.Retries = 25
+ arv.ApiToken = disp.AuthToken
+ disp.arvDispatcher = &dispatch.Dispatcher{
+ Arv: arv,
+ Logger: disp.logger,
+ BatchSize: disp.Cluster.API.MaxItemsPerResponse,
+ RunContainer: disp.runContainer,
+ PollPeriod: time.Duration(disp.Cluster.Containers.CloudVMs.PollInterval),
+ MinRetryPeriod: time.Duration(disp.Cluster.Containers.MinRetryPeriod),
+ }
+
+ if disp.Cluster.ManagementToken == "" {
+ disp.httpHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ http.Error(w, "Management API authentication is not configured", http.StatusForbidden)
+ })
+ } else {
+ mux := httprouter.New()
+ metricsH := promhttp.HandlerFor(disp.Registry, promhttp.HandlerOpts{
+ ErrorLog: disp.logger,
+ })
+ mux.Handler("GET", "/metrics", metricsH)
+ mux.Handler("GET", "/metrics.json", metricsH)
+ mux.Handler("GET", "/_health/:check", &health.Handler{
+ Token: disp.Cluster.ManagementToken,
+ Prefix: "/_health/",
+ Routes: health.Routes{"ping": disp.CheckHealth},
+ })
+ disp.httpHandler = auth.RequireLiteralToken(disp.Cluster.ManagementToken, mux)
+ }
+}
+
+func (disp *dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) error {
+ ctx, cancel := context.WithCancel(disp.Context)
+ defer cancel()
+
+ if ctr.State != dispatch.Locked {
+ // already started by prior invocation
+ } else if _, ok := disp.lsfqueue.JobID(ctr.UUID); !ok {
+ disp.logger.Printf("Submitting container %s to LSF", ctr.UUID)
+ cmd := []string{disp.Cluster.Containers.CrunchRunCommand}
+ cmd = append(cmd, "--runtime-engine="+disp.Cluster.Containers.RuntimeEngine)
+ cmd = append(cmd, disp.Cluster.Containers.CrunchRunArgumentsList...)
+ err := disp.submit(ctr, cmd)
+ if err != nil {
+ return err
+ }
+ }
+
+ disp.logger.Printf("Start monitoring container %v in state %q", ctr.UUID, ctr.State)
+ defer disp.logger.Printf("Done monitoring container %s", ctr.UUID)
+
+ // If the container disappears from the lsf queue, there is
+ // no point in waiting for further dispatch updates: just
+ // clean up and return.
+ go func(uuid string) {
+ for ctx.Err() == nil {
+ if _, ok := disp.lsfqueue.JobID(uuid); !ok {
+ disp.logger.Printf("container %s job disappeared from LSF queue", uuid)
+ cancel()
+ return
+ }
+ }
+ }(ctr.UUID)
+
+ for done := false; !done; {
+ select {
+ case <-ctx.Done():
+ // Disappeared from lsf queue
+ if err := disp.arvDispatcher.Arv.Get("containers", ctr.UUID, nil, &ctr); err != nil {
+ disp.logger.Printf("error getting final container state for %s: %s", ctr.UUID, err)
+ }
+ switch ctr.State {
+ case dispatch.Running:
+ disp.arvDispatcher.UpdateState(ctr.UUID, dispatch.Cancelled)
+ case dispatch.Locked:
+ disp.arvDispatcher.Unlock(ctr.UUID)
+ }
+ return nil
+ case updated, ok := <-status:
+ if !ok {
+ // status channel is closed, which is
+ // how arvDispatcher tells us to stop
+ // touching the container record, kill
+ // off any remaining LSF processes,
+ // etc.
+ done = true
+ break
+ }
+ if updated.State != ctr.State {
+ disp.logger.Infof("container %s changed state from %s to %s", ctr.UUID, ctr.State, updated.State)
+ }
+ ctr = updated
+ if ctr.Priority < 1 {
+ disp.logger.Printf("container %s has state %s, priority %d: cancel lsf job", ctr.UUID, ctr.State, ctr.Priority)
+ disp.bkill(ctr)
+ } else {
+ disp.lsfqueue.SetPriority(ctr.UUID, int64(ctr.Priority))
+ }
+ }
+ }
+ disp.logger.Printf("container %s is done", ctr.UUID)
+
+ // Try "bkill" every few seconds until the LSF job disappears
+ // from the queue.
+ ticker := time.NewTicker(5 * time.Second)
+ defer ticker.Stop()
+ for jobid, ok := disp.lsfqueue.JobID(ctr.UUID); ok; _, ok = disp.lsfqueue.JobID(ctr.UUID) {
+ err := disp.lsfcli.Bkill(jobid)
+ if err != nil {
+ disp.logger.Warnf("%s: bkill(%d): %s", ctr.UUID, jobid, err)
+ }
+ <-ticker.C
+ }
+ return nil
+}
+
+func (disp *dispatcher) submit(container arvados.Container, crunchRunCommand []string) error {
+ // Start with an empty slice here to ensure append() doesn't
+ // modify crunchRunCommand's underlying array
+ var crArgs []string
+ crArgs = append(crArgs, crunchRunCommand...)
+ crArgs = append(crArgs, container.UUID)
+ crScript := execScript(crArgs)
+
+ bsubArgs, err := disp.bsubArgs(container)
+ if err != nil {
+ return err
+ }
+ return disp.lsfcli.Bsub(crScript, bsubArgs, disp.ArvClient)
+}
+
+func (disp *dispatcher) bkill(ctr arvados.Container) {
+ if jobid, ok := disp.lsfqueue.JobID(ctr.UUID); !ok {
+ disp.logger.Debugf("bkill(%s): redundant, job not in queue", ctr.UUID)
+ } else if err := disp.lsfcli.Bkill(jobid); err != nil {
+ disp.logger.Warnf("%s: bkill(%d): %s", ctr.UUID, jobid, err)
+ }
+}
+
+func (disp *dispatcher) bsubArgs(container arvados.Container) ([]string, error) {
+ args := []string{"bsub"}
+ args = append(args, disp.Cluster.Containers.LSF.BsubArgumentsList...)
+ args = append(args, "-J", container.UUID)
+ args = append(args, disp.bsubConstraintArgs(container)...)
+ if u := disp.Cluster.Containers.LSF.BsubSudoUser; u != "" {
+ args = append([]string{"sudo", "-E", "-u", u}, args...)
+ }
+ return args, nil
+}
+
+func (disp *dispatcher) bsubConstraintArgs(container arvados.Container) []string {
+ // TODO: propagate container.SchedulingParameters.Partitions
+ tmp := int64(math.Ceil(float64(dispatchcloud.EstimateScratchSpace(&container)) / 1048576))
+ vcpus := container.RuntimeConstraints.VCPUs
+ mem := int64(math.Ceil(float64(container.RuntimeConstraints.RAM+
+ container.RuntimeConstraints.KeepCacheRAM+
+ int64(disp.Cluster.Containers.ReserveExtraRAM)) / 1048576))
+ return []string{
+ "-R", fmt.Sprintf("rusage[mem=%dMB:tmp=%dMB] affinity[core(%d)]", mem, tmp, vcpus),
+ }
+}
+
+// Check the next bjobs report, and invoke TrackContainer for all the
+// containers in the report. This gives us a chance to cancel existing
+// Arvados LSF jobs (started by a previous dispatch process) that
+// never released their LSF job allocations even though their
+// container states are Cancelled or Complete. See
+// https://dev.arvados.org/issues/10979
+func (disp *dispatcher) checkLsfQueueForOrphans() {
+ containerUuidPattern := regexp.MustCompile(`^[a-z0-9]{5}-dz642-[a-z0-9]{15}$`)
+ for _, uuid := range disp.lsfqueue.All() {
+ if !containerUuidPattern.MatchString(uuid) || !strings.HasPrefix(uuid, disp.Cluster.ClusterID) {
+ continue
+ }
+ err := disp.arvDispatcher.TrackContainer(uuid)
+ if err != nil {
+ disp.logger.Warnf("checkLsfQueueForOrphans: TrackContainer(%s): %s", uuid, err)
+ }
+ }
+}
+
+func execScript(args []string) []byte {
+ s := "#!/bin/sh\nexec"
+ for _, w := range args {
+ s += ` '`
+ s += strings.Replace(w, `'`, `'\''`, -1)
+ s += `'`
+ }
+ return []byte(s + "\n")
+}
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package lsf
+
+import (
+ "context"
+ "fmt"
+ "math/rand"
+ "os/exec"
+ "strconv"
+ "sync"
+ "testing"
+ "time"
+
+ "git.arvados.org/arvados.git/lib/config"
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "git.arvados.org/arvados.git/sdk/go/arvadostest"
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
+ "github.com/prometheus/client_golang/prometheus"
+ "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) {
+ check.TestingT(t)
+}
+
+var _ = check.Suite(&suite{})
+
+type suite struct {
+ disp *dispatcher
+}
+
+func (s *suite) TearDownTest(c *check.C) {
+ arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil)
+}
+
+func (s *suite) SetUpTest(c *check.C) {
+ cfg, err := config.NewLoader(nil, ctxlog.TestLogger(c)).Load()
+ c.Assert(err, check.IsNil)
+ cluster, err := cfg.GetCluster("")
+ c.Assert(err, check.IsNil)
+ cluster.Containers.CloudVMs.PollInterval = arvados.Duration(time.Second)
+ s.disp = newHandler(context.Background(), cluster, arvadostest.Dispatch1Token, prometheus.NewRegistry()).(*dispatcher)
+ s.disp.lsfcli.stubCommand = func(string, ...string) *exec.Cmd {
+ return exec.Command("bash", "-c", "echo >&2 unimplemented stub; false")
+ }
+}
+
+type lsfstub struct {
+ sudoUser string
+ errorRate float64
+}
+
+func (stub lsfstub) stubCommand(c *check.C) func(prog string, args ...string) *exec.Cmd {
+ mtx := sync.Mutex{}
+ nextjobid := 100
+ fakejobq := map[int]string{}
+ return func(prog string, args ...string) *exec.Cmd {
+ c.Logf("stubCommand: %q %q", prog, args)
+ if rand.Float64() < stub.errorRate {
+ return exec.Command("bash", "-c", "echo >&2 'stub random failure' && false")
+ }
+ if stub.sudoUser != "" && len(args) > 3 &&
+ prog == "sudo" &&
+ args[0] == "-E" &&
+ args[1] == "-u" &&
+ args[2] == stub.sudoUser {
+ prog, args = args[3], args[4:]
+ }
+ switch prog {
+ case "bsub":
+ c.Assert(args, check.HasLen, 4)
+ c.Check(args[0], check.Equals, "-J")
+ switch args[1] {
+ case arvadostest.LockedContainerUUID:
+ c.Check(args, check.DeepEquals, []string{"-J", arvadostest.LockedContainerUUID, "-R", "rusage[mem=11701MB:tmp=0MB] affinity[core(4)]"})
+ mtx.Lock()
+ fakejobq[nextjobid] = args[1]
+ nextjobid++
+ mtx.Unlock()
+ case arvadostest.QueuedContainerUUID:
+ c.Check(args, check.DeepEquals, []string{"-J", arvadostest.QueuedContainerUUID, "-R", "rusage[mem=11701MB:tmp=45777MB] affinity[core(4)]"})
+ mtx.Lock()
+ fakejobq[nextjobid] = args[1]
+ nextjobid++
+ mtx.Unlock()
+ default:
+ c.Errorf("unexpected uuid passed to bsub: args %q", args)
+ return exec.Command("false")
+ }
+ return exec.Command("echo", "submitted job")
+ case "bjobs":
+ c.Check(args, check.DeepEquals, []string{"-u", "all", "-noheader", "-o", "jobid stat job_name:30"})
+ out := ""
+ for jobid, uuid := range fakejobq {
+ out += fmt.Sprintf(`%d %s %s\n`, jobid, "RUN", uuid)
+ }
+ c.Logf("bjobs out: %q", out)
+ return exec.Command("printf", out)
+ case "bkill":
+ killid, _ := strconv.Atoi(args[0])
+ if uuid, ok := fakejobq[killid]; !ok {
+ return exec.Command("bash", "-c", fmt.Sprintf("printf >&2 'Job <%d>: No matching job found\n'", killid))
+ } else if uuid == "" {
+ return exec.Command("bash", "-c", fmt.Sprintf("printf >&2 'Job <%d>: Job has already finished\n'", killid))
+ } else {
+ go func() {
+ time.Sleep(time.Millisecond)
+ mtx.Lock()
+ delete(fakejobq, killid)
+ mtx.Unlock()
+ }()
+ return exec.Command("bash", "-c", fmt.Sprintf("printf 'Job <%d> is being terminated\n'", killid))
+ }
+ default:
+ return exec.Command("bash", "-c", fmt.Sprintf("echo >&2 'stub: command not found: %+q'", prog))
+ }
+ }
+}
+
+func (s *suite) TestSubmit(c *check.C) {
+ s.disp.lsfcli.stubCommand = lsfstub{
+ errorRate: 0.1,
+ sudoUser: s.disp.Cluster.Containers.LSF.BsubSudoUser,
+ }.stubCommand(c)
+ s.disp.Start()
+ deadline := time.Now().Add(20 * time.Second)
+ for range time.NewTicker(time.Second).C {
+ if time.Now().After(deadline) {
+ c.Error("timed out")
+ break
+ }
+ // "queuedcontainer" should be running
+ if _, ok := s.disp.lsfqueue.JobID(arvadostest.QueuedContainerUUID); !ok {
+ continue
+ }
+ // "lockedcontainer" should be cancelled because it
+ // has priority 0 (no matching container requests)
+ if _, ok := s.disp.lsfqueue.JobID(arvadostest.LockedContainerUUID); ok {
+ continue
+ }
+ var ctr arvados.Container
+ if err := s.disp.arvDispatcher.Arv.Get("containers", arvadostest.LockedContainerUUID, nil, &ctr); err != nil {
+ c.Logf("error getting container state for %s: %s", arvadostest.LockedContainerUUID, err)
+ continue
+ }
+ if ctr.State != arvados.ContainerStateQueued {
+ c.Logf("LockedContainer is not in the LSF queue but its arvados record has not been updated to state==Queued (state is %q)", ctr.State)
+ continue
+ }
+ c.Log("reached desired state")
+ break
+ }
+}
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package lsf
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "os/exec"
+ "strings"
+
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+ "github.com/sirupsen/logrus"
+)
+
+type bjobsEntry struct {
+ id int
+ name string
+ stat string
+}
+
+type lsfcli struct {
+ logger logrus.FieldLogger
+ // (for testing) if non-nil, call stubCommand() instead of
+ // exec.Command() when running lsf command line programs.
+ stubCommand func(string, ...string) *exec.Cmd
+}
+
+func (cli lsfcli) command(prog string, args ...string) *exec.Cmd {
+ if f := cli.stubCommand; f != nil {
+ return f(prog, args...)
+ } else {
+ return exec.Command(prog, args...)
+ }
+}
+
+func (cli lsfcli) Bsub(script []byte, args []string, arv *arvados.Client) error {
+ cli.logger.Infof("bsub command %q script %q", args, script)
+ cmd := cli.command(args[0], args[1:]...)
+ cmd.Env = append([]string(nil), os.Environ()...)
+ cmd.Env = append(cmd.Env, "ARVADOS_API_HOST="+arv.APIHost)
+ cmd.Env = append(cmd.Env, "ARVADOS_API_TOKEN="+arv.AuthToken)
+ if arv.Insecure {
+ cmd.Env = append(cmd.Env, "ARVADOS_API_HOST_INSECURE=1")
+ }
+ cmd.Stdin = bytes.NewReader(script)
+ out, err := cmd.Output()
+ cli.logger.WithField("stdout", string(out)).Infof("bsub finished")
+ return errWithStderr(err)
+}
+
+func (cli lsfcli) Bjobs() ([]bjobsEntry, error) {
+ cli.logger.Debugf("Bjobs()")
+ cmd := cli.command("bjobs", "-u", "all", "-noheader", "-o", "jobid stat job_name:30")
+ buf, err := cmd.Output()
+ if err != nil {
+ return nil, errWithStderr(err)
+ }
+ var bjobs []bjobsEntry
+ for _, line := range strings.Split(string(buf), "\n") {
+ if line == "" {
+ continue
+ }
+ var ent bjobsEntry
+ if _, err := fmt.Sscan(line, &ent.id, &ent.stat, &ent.name); err != nil {
+ cli.logger.Warnf("ignoring unparsed line in bjobs output: %q", line)
+ continue
+ }
+ bjobs = append(bjobs, ent)
+ }
+ return bjobs, nil
+}
+
+func (cli lsfcli) Bkill(id int) error {
+ cli.logger.Infof("Bkill(%d)", id)
+ cmd := cli.command("bkill", fmt.Sprintf("%d", id))
+ buf, err := cmd.CombinedOutput()
+ if err == nil || strings.Index(string(buf), "already finished") >= 0 {
+ return nil
+ } else {
+ return fmt.Errorf("%s (%q)", err, buf)
+ }
+}
+
+func errWithStderr(err error) error {
+ if err, ok := err.(*exec.ExitError); ok {
+ return fmt.Errorf("%s (%q)", err, err.Stderr)
+ }
+ return err
+}
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package lsf
+
+import (
+ "sync"
+ "time"
+
+ "github.com/sirupsen/logrus"
+)
+
+type lsfqueue struct {
+ logger logrus.FieldLogger
+ period time.Duration
+ lsfcli *lsfcli
+
+ initOnce sync.Once
+ mutex sync.Mutex
+ nextReady chan (<-chan struct{})
+ updated *sync.Cond
+ latest map[string]bjobsEntry
+}
+
+// JobID waits for the next queue update (so even a job that was only
+// submitted a nanosecond ago will show up) and then returns the LSF
+// job ID corresponding to the given container UUID.
+func (q *lsfqueue) JobID(uuid string) (int, bool) {
+ ent, ok := q.getNext()[uuid]
+ return ent.id, ok
+}
+
+// All waits for the next queue update, then returns the names of all
+// jobs in the queue. Used by checkLsfQueueForOrphans().
+func (q *lsfqueue) All() []string {
+ latest := q.getNext()
+ names := make([]string, 0, len(latest))
+ for name := range latest {
+ names = append(names, name)
+ }
+ return names
+}
+
+func (q *lsfqueue) SetPriority(uuid string, priority int64) {
+ q.initOnce.Do(q.init)
+ q.logger.Debug("SetPriority is not implemented")
+}
+
+func (q *lsfqueue) getNext() map[string]bjobsEntry {
+ q.initOnce.Do(q.init)
+ <-(<-q.nextReady)
+ q.mutex.Lock()
+ defer q.mutex.Unlock()
+ return q.latest
+}
+
+func (q *lsfqueue) init() {
+ q.updated = sync.NewCond(&q.mutex)
+ q.nextReady = make(chan (<-chan struct{}))
+ ticker := time.NewTicker(time.Second)
+ go func() {
+ for range ticker.C {
+ // Send a new "next update ready" channel to
+ // the next goroutine that wants one (and any
+ // others that have already queued up since
+ // the first one started waiting).
+ //
+ // Below, when we get a new update, we'll
+ // signal that to the other goroutines by
+ // closing the ready chan.
+ ready := make(chan struct{})
+ q.nextReady <- ready
+ for {
+ select {
+ case q.nextReady <- ready:
+ continue
+ default:
+ }
+ break
+ }
+ // Run bjobs repeatedly if needed, until we
+ // get valid output.
+ var ents []bjobsEntry
+ for {
+ q.logger.Debug("running bjobs")
+ var err error
+ ents, err = q.lsfcli.Bjobs()
+ if err == nil {
+ break
+ }
+ q.logger.Warnf("bjobs: %s", err)
+ <-ticker.C
+ }
+ next := make(map[string]bjobsEntry, len(ents))
+ for _, ent := range ents {
+ next[ent.name] = ent
+ }
+ // Replace q.latest and notify all the
+ // goroutines that the "next update" they
+ // asked for is now ready.
+ q.mutex.Lock()
+ q.latest = next
+ q.mutex.Unlock()
+ close(ready)
+ }
+ }()
+}
fpm_depends+=(nodejs)
case "$TARGET" in
- ubuntu1604)
- fpm_depends+=(libcurl3-gnutls)
- ;;
debian* | ubuntu*)
fpm_depends+=(libcurl3-gnutls python3-distutils)
;;
Composer Service
Controller Service
DispatchCloud Service
+ DispatchLSF Service
GitHTTP Service
GitSSH Service
Health Service
AssignNodeHostname string
}
}
+ LSF struct {
+ BsubSudoUser string
+ BsubArgumentsList []string
+ }
}
type CloudVMsConfig struct {
ServiceNameRailsAPI ServiceName = "arvados-api-server"
ServiceNameController ServiceName = "arvados-controller"
ServiceNameDispatchCloud ServiceName = "arvados-dispatch-cloud"
+ ServiceNameDispatchLSF ServiceName = "arvados-dispatch-lsf"
ServiceNameHealth ServiceName = "arvados-health"
ServiceNameWorkbench1 ServiceName = "arvados-workbench1"
ServiceNameWorkbench2 ServiceName = "arvados-workbench2"
ServiceNameRailsAPI: svcs.RailsAPI,
ServiceNameController: svcs.Controller,
ServiceNameDispatchCloud: svcs.DispatchCloud,
+ ServiceNameDispatchLSF: svcs.DispatchLSF,
ServiceNameHealth: svcs.Health,
ServiceNameWorkbench1: svcs.Workbench1,
ServiceNameWorkbench2: svcs.Workbench2,
GatewayAddress string `json:"gateway_address"`
InteractiveSessionStarted bool `json:"interactive_session_started"`
OutputStorageClasses []string `json:"output_storage_classes"`
+ RuntimeUserUUID string `json:"runtime_user_uuid"`
+ RuntimeAuthScopes []string `json:"runtime_auth_scopes"`
+ RuntimeToken string `json:"runtime_token"`
}
// ContainerRequest is an arvados#container_request resource.
QueuedContainerRequestUUID = "zzzzz-xvhdp-cr4queuedcontnr"
QueuedContainerUUID = "zzzzz-dz642-queuedcontainer"
+ LockedContainerUUID = "zzzzz-dz642-lockedcontainer"
+
RunningContainerUUID = "zzzzz-dz642-runningcontainr"
CompletedContainerUUID = "zzzzz-dz642-compltcontainer"
package dispatch
import (
+ "bytes"
"context"
"fmt"
"sync"
"time"
+ "git.arvados.org/arvados.git/lib/dispatchcloud"
"git.arvados.org/arvados.git/sdk/go/arvados"
"git.arvados.org/arvados.git/sdk/go/arvadosclient"
"github.com/sirupsen/logrus"
// running, and return.
//
// The DispatchFunc should not return until the container is finished.
-type DispatchFunc func(*Dispatcher, arvados.Container, <-chan arvados.Container)
+type DispatchFunc func(*Dispatcher, arvados.Container, <-chan arvados.Container) error
// Run watches the API server's queue for containers that are either
// ready to run and available to lock, or are already locked by this
}
tracker.updates <- c
go func() {
- d.RunContainer(d, c, tracker.updates)
- // RunContainer blocks for the lifetime of the container. When
- // it returns, the tracker should delete itself.
+ err := d.RunContainer(d, c, tracker.updates)
+ if err != nil {
+ text := fmt.Sprintf("Error running container %s: %s", c.UUID, err)
+ if err, ok := err.(dispatchcloud.ConstraintsNotSatisfiableError); ok {
+ var logBuf bytes.Buffer
+ fmt.Fprintf(&logBuf, "cannot run container %s: %s\n", c.UUID, err)
+ if len(err.AvailableTypes) == 0 {
+ fmt.Fprint(&logBuf, "No instance types are configured.\n")
+ } else {
+ fmt.Fprint(&logBuf, "Available instance types:\n")
+ for _, t := range err.AvailableTypes {
+ fmt.Fprintf(&logBuf,
+ "Type %q: %d VCPUs, %d RAM, %d Scratch, %f Price\n",
+ t.Name, t.VCPUs, t.RAM, t.Scratch, t.Price)
+ }
+ }
+ text = logBuf.String()
+ d.UpdateState(c.UUID, Cancelled)
+ }
+ d.Logger.Printf("%s", text)
+ lr := arvadosclient.Dict{"log": arvadosclient.Dict{
+ "object_uuid": c.UUID,
+ "event_type": "dispatch",
+ "properties": map[string]string{"text": text}}}
+ d.Arv.Create("logs", lr, nil)
+ d.Unlock(c.UUID)
+ }
+
d.mtx.Lock()
delete(d.trackers, c.UUID)
d.mtx.Unlock()
time.AfterFunc(10*time.Second, func() { done <- false })
d := &Dispatcher{
Arv: arv,
- RunContainer: func(dsp *Dispatcher, ctr arvados.Container, status <-chan arvados.Container) {
+ RunContainer: func(dsp *Dispatcher, ctr arvados.Container, status <-chan arvados.Container) error {
for ctr := range status {
c.Logf("%#v", ctr)
}
done <- true
+ return nil
},
}
d.TrackContainer(arvadostest.QueuedContainerUUID)
for _, svc := range []*arvados.Service{
&svcs.Controller,
&svcs.DispatchCloud,
+ &svcs.DispatchLSF,
&svcs.Keepbalance,
&svcs.Keepproxy,
&svcs.Keepstore,
}
req.Header.Set(HeaderRequestID, gen.Next())
}
+ w.Header().Set("X-Request-Id", req.Header.Get("X-Request-Id"))
h.ServeHTTP(w, req)
})
}
multipleResponseError
}
-type InsufficientReplicasError error
+type InsufficientReplicasError struct{ error }
-type OversizeBlockError error
+type OversizeBlockError struct{ error }
-var ErrOversizeBlock = OversizeBlockError(errors.New("Exceeded maximum block size (" + strconv.Itoa(BLOCKSIZE) + ")"))
+var ErrOversizeBlock = OversizeBlockError{error: errors.New("Exceeded maximum block size (" + strconv.Itoa(BLOCKSIZE) + ")")}
var MissingArvadosApiHost = errors.New("Missing required environment variable ARVADOS_API_HOST")
var MissingArvadosApiToken = errors.New("Missing required environment variable ARVADOS_API_TOKEN")
var InvalidLocatorError = errors.New("Invalid locator")
"bytes"
"context"
"crypto/md5"
- "errors"
"fmt"
"io"
"io/ioutil"
_, replicas, err := kc.PutB([]byte("foo"))
- c.Check(err, FitsTypeOf, InsufficientReplicasError(errors.New("")))
+ c.Check(err, FitsTypeOf, InsufficientReplicasError{})
c.Check(replicas, Equals, 1)
c.Check(<-st.handled, Equals, ks1[0].url)
}
_, replicas, err := kc.PutB([]byte("foo"))
<-st.handled
- c.Check(err, FitsTypeOf, InsufficientReplicasError(errors.New("")))
+ c.Check(err, FitsTypeOf, InsufficientReplicasError{})
c.Check(replicas, Equals, 2)
}
_, replicas, err := kc.PutB([]byte("foo"))
- c.Check(err, FitsTypeOf, InsufficientReplicasError(errors.New("")))
+ c.Check(err, FitsTypeOf, InsufficientReplicasError{})
c.Check(replicas, Equals, 1)
c.Check(<-st.handled, Equals, localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", 0)])
_, replicas, err := kc.PutB([]byte("foo"))
- c.Check(err, FitsTypeOf, InsufficientReplicasError(errors.New("")))
+ c.Check(err, FitsTypeOf, InsufficientReplicasError{})
c.Check(replicas, Equals, 0)
}
msg += resp + "; "
}
msg = msg[:len(msg)-2]
- return resp, InsufficientReplicasError(errors.New(msg))
+ return resp, InsufficientReplicasError{error: errors.New(msg)}
}
break
}
end
err[:errors] ||= args
err[:errors].map! do |err|
- err += " (" + Thread.current[:request_id] + ")"
+ err += " (#{request.request_id})"
end
err[:error_token] = [Time.now.utc.to_i, "%08x" % rand(16 ** 8)].join("+")
status = err.delete(:status) || 422
end
def set_current_request_id
- req_id = request.headers['X-Request-Id']
- if !req_id || req_id.length < 1 || req_id.length > 1024
- # Client-supplied ID is either missing or too long to be
- # considered friendly.
- req_id = "req-" + Random::DEFAULT.rand(2**128).to_s(36)[0..19]
- end
- response.headers['X-Request-Id'] = Thread.current[:request_id] = req_id
- Rails.logger.tagged(req_id) do
+ Rails.logger.tagged(request.request_id) do
yield
end
- Thread.current[:request_id] = nil
end
def append_info_to_payload(payload)
# already know how to properly treat them.
attribute :secret_mounts, :jsonbHash, default: {}
attribute :runtime_status, :jsonbHash, default: {}
- attribute :runtime_auth_scopes, :jsonbHash, default: {}
+ attribute :runtime_auth_scopes, :jsonbArray, default: []
attribute :output_storage_classes, :jsonbArray, default: ["default"]
serialize :environment, Hash
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+module CustomRequestId
+ def make_request_id(req_id)
+ if !req_id || req_id.length < 1 || req_id.length > 1024
+ # Client-supplied ID is either missing or too long to be
+ # considered friendly.
+ internal_request_id
+ else
+ req_id
+ end
+ end
+
+ def internal_request_id
+ "req-" + Random::DEFAULT.rand(2**128).to_s(36)[0..19]
+ end
+end
+
+class ActionDispatch::RequestId
+ # Instead of using the default UUID-like format for X-Request-Id headers,
+ # use our own.
+ prepend CustomRequestId
+end
\ No newline at end of file
token_time = token.split('+', 2).first.to_i
assert_operator(token_time, :>=, @start_stamp, "error token too old")
assert_operator(token_time, :<=, now_timestamp, "error token too new")
- json_response['errors'].each do |err|
- assert_match(/req-[a-z0-9]{20}/, err, "X-Request-Id value missing on error message")
- end
end
def check_404(errmsg="Path not found")
check_error_token
end
- test "X-Request-Id header" do
- authorize_with :spectator
- get(:index)
- assert_match /^req-[0-9a-zA-Z]{20}$/, response.headers['X-Request-Id']
- end
-
- # The response header is the one that gets logged, so this test also
- # ensures we log the ID supplied in the request, if any.
- test "X-Request-Id given by client" do
- authorize_with :spectator
- @request.headers['X-Request-Id'] = 'abcdefG'
- get(:index)
- assert_equal 'abcdefG', response.headers['X-Request-Id']
- end
-
- test "X-Request-Id given by client is ignored if too long" do
- authorize_with :spectator
- @request.headers['X-Request-Id'] = 'abcdefG' * 1000
- get(:index)
- assert_match /^req-[0-9a-zA-Z]{20}$/, response.headers['X-Request-Id']
- end
-
['foo', '', 'FALSE', 'TRUE', nil, [true], {a:true}, '"true"'].each do |bogus|
test "bogus boolean parameter #{bogus.inspect} returns error" do
@controller = Arvados::V1::GroupsController.new
assert_nil assigns(:object)
assert_not_nil json_response['errors']
assert_response 404
+ assert_match /^req-[0-9a-zA-Z]{20}$/, response.headers['X-Request-Id']
end
end
"Unexpected new route: #{route.path.spec}")
end
end
+
+ test "X-Request-Id header" do
+ get "/", headers: auth(:spectator)
+ assert_match /^req-[0-9a-zA-Z]{20}$/, response.headers['X-Request-Id']
+ end
+
+ test "X-Request-Id header on non-existant object URL" do
+ get "/arvados/v1/container_requests/invalid",
+ params: {:format => :json}, headers: auth(:active)
+ assert_response 404
+ assert_match /^req-[0-9a-zA-Z]{20}$/, response.headers['X-Request-Id']
+ end
+
+ # The response header is the one that gets logged, so this test also
+ # ensures we log the ID supplied in the request, if any.
+ test "X-Request-Id given by client" do
+ get "/", headers: auth(:spectator).merge({'X-Request-Id': 'abcdefG'})
+ assert_equal 'abcdefG', response.headers['X-Request-Id']
+ end
+
+ test "X-Request-Id given by client is ignored if too long" do
+ authorize_with :spectator
+ long_reqId = 'abcdefG' * 1000
+ get "/", headers: auth(:spectator).merge({'X-Request-Id': long_reqId})
+ assert_match /^req-[0-9a-zA-Z]{20}$/, response.headers['X-Request-Id']
+ end
end
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
// crunch-run terminates, mark the container as Cancelled.
func (lr *LocalRun) run(dispatcher *dispatch.Dispatcher,
container arvados.Container,
- status <-chan arvados.Container) {
+ status <-chan arvados.Container) error {
uuid := container.UUID
case lr.concurrencyLimit <- true:
break
case <-lr.ctx.Done():
- return
+ return lr.ctx.Err()
}
defer func() { <-lr.concurrencyLimit }()
}
dispatcher.Logger.Printf("finalized container %v", uuid)
+ return nil
}
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
cl := arvados.Cluster{Containers: arvados.ContainersConfig{RuntimeEngine: "docker"}}
- dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) {
- (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
- cancel()
+ dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) error {
+ defer cancel()
+ return (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
}
err = dispatcher.Run(ctx)
cl := arvados.Cluster{Containers: arvados.ContainersConfig{RuntimeEngine: "docker"}}
- dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) {
- (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
- cancel()
+ dispatcher.RunContainer = func(d *dispatch.Dispatcher, c arvados.Container, s <-chan arvados.Container) error {
+ defer cancel()
+ return (&LocalRun{startCmd, make(chan bool, 8), ctx, &cl}).run(d, c, s)
}
re := regexp.MustCompile(`(?ms).*` + expected + `.*`)
// Dispatcher service for Crunch that submits containers to the slurm queue.
import (
- "bytes"
"context"
"flag"
"fmt"
// already in the queue). Cancel the slurm job if the container's
// priority changes to zero or its state indicates it's no longer
// running.
-func (disp *Dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) {
+func (disp *Dispatcher) runContainer(_ *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
log.Printf("Submitting container %s to slurm", ctr.UUID)
cmd := []string{disp.cluster.Containers.CrunchRunCommand}
cmd = append(cmd, disp.cluster.Containers.CrunchRunArgumentsList...)
- if err := disp.submit(ctr, cmd); err != nil {
- var text string
- switch err := err.(type) {
- case dispatchcloud.ConstraintsNotSatisfiableError:
- var logBuf bytes.Buffer
- fmt.Fprintf(&logBuf, "cannot run container %s: %s\n", ctr.UUID, err)
- if len(err.AvailableTypes) == 0 {
- fmt.Fprint(&logBuf, "No instance types are configured.\n")
- } else {
- fmt.Fprint(&logBuf, "Available instance types:\n")
- for _, t := range err.AvailableTypes {
- fmt.Fprintf(&logBuf,
- "Type %q: %d VCPUs, %d RAM, %d Scratch, %f Price\n",
- t.Name, t.VCPUs, t.RAM, t.Scratch, t.Price,
- )
- }
- }
- text = logBuf.String()
- disp.UpdateState(ctr.UUID, dispatch.Cancelled)
- default:
- text = fmt.Sprintf("Error submitting container %s to slurm: %s", ctr.UUID, err)
- }
- log.Print(text)
-
- lr := arvadosclient.Dict{"log": arvadosclient.Dict{
- "object_uuid": ctr.UUID,
- "event_type": "dispatch",
- "properties": map[string]string{"text": text}}}
- disp.Arv.Create("logs", lr, nil)
-
- disp.Unlock(ctr.UUID)
- return
+ err := disp.submit(ctr, cmd)
+ if err != nil {
+ return err
}
}
case dispatch.Locked:
disp.Unlock(ctr.UUID)
}
- return
+ return nil
case updated, ok := <-status:
if !ok {
log.Printf("container %s is done: cancel slurm job", ctr.UUID)
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
func (s *IntegrationSuite) integrationTest(c *C,
expectBatch [][]string,
- runContainer func(*dispatch.Dispatcher, arvados.Container)) arvados.Container {
+ runContainer func(*dispatch.Dispatcher, arvados.Container)) (arvados.Container, error) {
arvadostest.ResetEnv()
arv, err := arvadosclient.MakeArvadosClient()
ctx, cancel := context.WithCancel(context.Background())
doneRun := make(chan struct{})
+ doneDispatch := make(chan error)
s.disp.Dispatcher = &dispatch.Dispatcher{
Arv: arv,
PollPeriod: time.Second,
- RunContainer: func(disp *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) {
+ RunContainer: func(disp *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) error {
go func() {
runContainer(disp, ctr)
s.slurm.queue = ""
doneRun <- struct{}{}
}()
- s.disp.runContainer(disp, ctr, status)
+ err := s.disp.runContainer(disp, ctr, status)
cancel()
+ doneDispatch <- err
+ return nil
},
}
err = s.disp.Dispatcher.Run(ctx)
<-doneRun
c.Assert(err, Equals, context.Canceled)
+ errDispatch := <-doneDispatch
s.disp.sqCheck.Stop()
var container arvados.Container
err = arv.Get("containers", "zzzzz-dz642-queuedcontainer", nil, &container)
c.Check(err, IsNil)
- return container
+ return container, errDispatch
}
func (s *IntegrationSuite) TestNormal(c *C) {
s.slurm = slurmFake{queue: "zzzzz-dz642-queuedcontainer 10000 100 PENDING Resources\n"}
- container := s.integrationTest(c,
+ container, _ := s.integrationTest(c,
nil,
func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
dispatcher.UpdateState(container.UUID, dispatch.Running)
s.slurm = slurmFake{queue: "zzzzz-dz642-queuedcontainer 10000 100 PENDING Resources\n"}
readyToCancel := make(chan bool)
s.slurm.onCancel = func() { <-readyToCancel }
- container := s.integrationTest(c,
+ container, _ := s.integrationTest(c,
nil,
func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
dispatcher.UpdateState(container.UUID, dispatch.Running)
}
func (s *IntegrationSuite) TestMissingFromSqueue(c *C) {
- container := s.integrationTest(c,
+ container, _ := s.integrationTest(c,
[][]string{{
fmt.Sprintf("--job-name=%s", "zzzzz-dz642-queuedcontainer"),
fmt.Sprintf("--nice=%d", 10000),
func (s *IntegrationSuite) TestSbatchFail(c *C) {
s.slurm = slurmFake{errBatch: errors.New("something terrible happened")}
- container := s.integrationTest(c,
+ container, err := s.integrationTest(c,
[][]string{{"--job-name=zzzzz-dz642-queuedcontainer", "--nice=10000", "--no-requeue", "--mem=11445", "--cpus-per-task=4", "--tmp=45777"}},
func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
dispatcher.UpdateState(container.UUID, dispatch.Running)
dispatcher.UpdateState(container.UUID, dispatch.Complete)
})
c.Check(container.State, Equals, arvados.ContainerStateComplete)
-
- arv, err := arvadosclient.MakeArvadosClient()
- c.Assert(err, IsNil)
-
- var ll arvados.LogList
- err = arv.List("logs", arvadosclient.Dict{"filters": [][]string{
- {"object_uuid", "=", container.UUID},
- {"event_type", "=", "dispatch"},
- }}, &ll)
- c.Assert(err, IsNil)
- c.Assert(len(ll.Items), Equals, 1)
+ c.Check(err, ErrorMatches, `something terrible happened`)
}
type StubbedSuite struct {
dispatcher := dispatch.Dispatcher{
Arv: arv,
PollPeriod: time.Second,
- RunContainer: func(disp *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) {
+ RunContainer: func(disp *dispatch.Dispatcher, ctr arvados.Container, status <-chan arvados.Container) error {
go func() {
time.Sleep(time.Second)
disp.UpdateState(ctr.UUID, dispatch.Running)
}()
s.disp.runContainer(disp, ctr, status)
cancel()
+ return nil
},
}
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
# SPDX-License-Identifier: Apache-2.0
case "$TARGET" in
- ubuntu1604)
- fpm_depends+=()
- ;;
debian* | ubuntu*)
fpm_depends+=(python3-distutils)
;;
After=network.target
AssertPathExists=/etc/arvados/config.yml
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
import (
"bytes"
"crypto/md5"
- "errors"
"fmt"
"io/ioutil"
"math/rand"
content := []byte("TestDesiredReplicas")
hash := fmt.Sprintf("%x", md5.Sum(content))
- for _, kc.Want_replicas = range []int{0, 1, 2} {
+ for _, kc.Want_replicas = range []int{0, 1, 2, 3} {
locator, rep, err := kc.PutB(content)
- c.Check(err, Equals, nil)
- c.Check(rep, Equals, kc.Want_replicas)
- if rep > 0 {
- c.Check(locator, Matches, fmt.Sprintf(`^%s\+%d(\+.+)?$`, hash, len(content)))
+ if kc.Want_replicas < 3 {
+ c.Check(err, Equals, nil)
+ c.Check(rep, Equals, kc.Want_replicas)
+ if rep > 0 {
+ c.Check(locator, Matches, fmt.Sprintf(`^%s\+%d(\+.+)?$`, hash, len(content)))
+ }
+ } else {
+ c.Check(err, ErrorMatches, ".*503.*")
}
}
}
hash2, rep, err := kc.PutB([]byte("bar"))
c.Check(hash2, Equals, "")
c.Check(rep, Equals, 0)
- c.Check(err, FitsTypeOf, keepclient.InsufficientReplicasError(errors.New("")))
+ c.Check(err, FitsTypeOf, keepclient.InsufficientReplicasError{})
blocklen, _, err := kc.Ask(hash)
c.Check(err, FitsTypeOf, &keepclient.ErrNotFound{})
} else {
c.Check(hash2, Equals, "")
c.Check(rep, Equals, 0)
- c.Check(err, FitsTypeOf, keepclient.InsufficientReplicasError(errors.New("")))
+ c.Check(err, FitsTypeOf, keepclient.InsufficientReplicasError{})
}
logbuf.Reset()
}
Documentation=https://doc.arvados.org/
After=network.target
-# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
-StartLimitInterval=0
-
# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
StartLimitIntervalSec=0
if existing_groups.index(addgroup).nil?
# User should be in group, but isn't, so add them.
STDERR.puts "Add user #{username} to #{addgroup} group"
- system("adduser", username, addgroup)
+ system("usermod", "-aG", addgroup, username)
end
end
if groups.index(removegroup).nil?
# User is in a group, but shouldn't be, so remove them.
STDERR.puts "Remove user #{username} from #{removegroup} group"
- system("deluser", username, removegroup)
+ system("gpasswd", "-d", username, removegroup)
end
end
ProviderType: t3.small
VCPUs: 2
RAM: 2GiB
- IncludedScratch: 50GB
AddedScratch: 50GB
Price: 0.0208
c5large:
ProviderType: c5.large
VCPUs: 2
RAM: 4GiB
- IncludedScratch: 50GB
AddedScratch: 50GB
Price: 0.085
m5large:
ProviderType: m5.large
VCPUs: 2
RAM: 8GiB
- IncludedScratch: 50GB
AddedScratch: 50GB
Price: 0.096
c5xlarge:
ProviderType: c5.xlarge
VCPUs: 4
RAM: 8GiB
- IncludedScratch: 100GB
AddedScratch: 100GB
Price: 0.17
m5xlarge:
ProviderType: m5.xlarge
VCPUs: 4
RAM: 16GiB
- IncludedScratch: 100GB
AddedScratch: 100GB
Price: 0.192
m5xlarge_extradisk:
ProviderType: m5.xlarge
VCPUs: 4
RAM: 16GiB
- IncludedScratch: 400GB
AddedScratch: 400GB
Price: 0.193
c52xlarge:
ProviderType: c5.2xlarge
VCPUs: 8
RAM: 16GiB
- IncludedScratch: 200GB
AddedScratch: 200GB
Price: 0.34
m52xlarge:
ProviderType: m5.2xlarge
VCPUs: 8
RAM: 32GiB
- IncludedScratch: 200GB
AddedScratch: 200GB
Price: 0.384
c54xlarge:
ProviderType: c5.4xlarge
VCPUs: 16
RAM: 32GiB
- IncludedScratch: 400GB
AddedScratch: 400GB
Price: 0.68
m54xlarge:
ProviderType: m5.4xlarge
VCPUs: 16
RAM: 64GiB
- IncludedScratch: 400GB
AddedScratch: 400GB
Price: 0.768