Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <ldipentima@veritasgenetics.com>
doc/fonts/*
doc/_includes/_config_default_yml.liquid
doc/user/cwl/federated/*
+doc/_includes/_federated_cwl.liquid
*/docker_image
docker/jobs/apt.arvados.org*.list
docker/jobs/1078ECD7.key
--- /dev/null
+<%# Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: AGPL-3.0 %>
+
+<div class="compute-summary-numbers">
+ <table>
+ <colgroup>
+ <col width="50%">
+ <col width="50%">
+ </colgroup>
+ <tr>
+ <th>Pending containers</th>
+ <th>Running containers</th>
+ </tr>
+ <tr>
+ <% pending_containers = Container.order("created_at asc").filter([["state", "in", ["Queued", "Locked"]], ["priority", ">", 0]]).limit(1) %>
+ <% running_containers = Container.order("started_at asc").where(state: "Running").limit(1) %>
+ <td><%= pending_containers.items_available %></td>
+ <td><%= running_containers.items_available %></td>
+ </tr>
+ <tr>
+ <th>Oldest pending</th>
+ <th>Longest running</th>
+ </tr>
+ <tr>
+ <td><% if pending_containers.first then %>
+ <%= link_to_if_arvados_object pending_containers.first, link_text: render_runtime(Time.now - pending_containers.first.created_at, false, false) %>
+ <% else %>
+ -
+ <% end %>
+ </td>
+
+ <td><% if running_containers.first then %>
+ <%= link_to_if_arvados_object running_containers.first, link_text: render_runtime(Time.now - running_containers.first.created_at, false, false) %>
+ <% else %>
+ -
+ <% end %>
+ </td>
+ </tr>
+ </table>
+
+</div>
</div>
</div>
</div>
+ <% end %>
+ <% if Container.api_exists?(:index) %>
+ <div class="panel panel-default" style="min-height: 10.5em">
+ <div class="panel-heading"><span class="panel-title">Container status</span></div>
+ <div class="panel-body containers-summary-pane">
+ <div>
+ <%= render partial: 'container_summary' %>
+ </div>
+ </div>
+ </div>
<% end %>
<% if Rails.configuration.show_recent_collections_on_dashboard %>
<div class="panel panel-default">
ln -vsfT "$WORKSPACE" "$GOPATH/src/git.curoverse.com/arvados.git"
go get -v github.com/kardianos/govendor
cd "$GOPATH/src/git.curoverse.com/arvados.git"
- if [[ -n "$short" ]]; then
- go get -v -d ...
- "$GOPATH/bin/govendor" sync
- else
- # Remove cached source dirs in workdir. Otherwise, they will
- # not qualify as +missing or +external below, and we won't be
- # able to detect that they're missing from vendor/vendor.json.
- rm -rf vendor/*/
- go get -v -d ...
- "$GOPATH/bin/govendor" sync
- [[ -z $("$GOPATH/bin/govendor" list +unused +missing +external | tee /dev/stderr) ]] \
- || fatal "vendor/vendor.json has unused or missing dependencies -- try:
-
-(export GOPATH=\"${GOPATH}\"; cd \$GOPATH/src/git.curoverse.com/arvados.git && \$GOPATH/bin/govendor add +missing +external && \$GOPATH/bin/govendor remove +unused)
-
-";
- fi
+ go get -v -d ...
+ "$GOPATH/bin/govendor" sync
) || fatal "Go setup failed"
setup_virtualenv "$VENVDIR" --python python2.7
services/api)
stop_services
;;
- gofmt | doc | lib/cli | lib/cloud/azure | lib/cloud/ec2 | lib/cmd | lib/dispatchcloud/ssh_executor | lib/dispatchcloud/worker)
+ gofmt | govendor | doc | lib/cli | lib/cloud/azure | lib/cloud/ec2 | lib/cmd | lib/dispatchcloud/ssh_executor | lib/dispatchcloud/worker)
# don't care whether services are running
;;
*)
[[ -z "$(gofmt -e -d $dirs | tee -a /dev/stderr)" ]]
}
+test_govendor() {
+ (
+ set -e
+ cd "$GOPATH/src/git.curoverse.com/arvados.git"
+ # Remove cached source dirs in workdir. Otherwise, they will
+ # not qualify as +missing or +external below, and we won't be
+ # able to detect that they're missing from vendor/vendor.json.
+ rm -rf vendor/*/
+ go get -v -d ...
+ "$GOPATH/bin/govendor" sync
+ if [[ -n $("$GOPATH/bin/govendor" list +unused +missing +external | tee /dev/stderr) ]]; then
+ echo >&2 "vendor/vendor.json has unused or missing dependencies -- try:
+
+(export GOPATH=\"${GOPATH}\"; cd \$GOPATH/src/git.curoverse.com/arvados.git && \$GOPATH/bin/govendor add +missing +external && \$GOPATH/bin/govendor remove +unused)
+
+"
+ return 1
+ fi
+ )
+}
+
test_services/api() {
rm -f "$WORKSPACE/services/api/git-commit.version"
cd "$WORKSPACE/services/api" \
fi
do_test gofmt
+ do_test govendor
do_test doc
do_test sdk/ruby
do_test sdk/R
# $ rake generate baseurl=/example arvados_api_host=example.com
baseurl:
+current_version:
+all_versions:
arvados_api_host: localhost
arvados_cluster_uuid: local
arvados_workbench_host: http://localhost
-../user/cwl/federated/federated.cwl
\ No newline at end of file
+../user/cwl/federated/feddemo.cwl
\ No newline at end of file
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-# Start a shell for the postgres user:
- <notextile><pre>~$ <span class="userinput">sudo -u postgres bash</span></pre></notextile>
-# Generate a new database password:
- <notextile><pre>$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
+<ol>
+<li>Start a shell for the postgres user:
+<notextile><pre>~$ <span class="userinput">sudo -u postgres bash</span></pre></notextile>
+</li>
+<li>Generate a new database password:
+<notextile><pre>$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
yourgeneratedpassword
</pre></notextile> Record this. You'll need it when you set up the Rails server later.
-# Create a database user with the password you generated:
+</li>
+<li>Create a database user with the password you generated:
<notextile><pre><code>$ <span class="userinput">createuser --encrypted -R -S --pwprompt {{service_role}}</span>
-Enter password for new role: <span class="userinput">yourgeneratedpassword</span>
-Enter it again: <span class="userinput">yourgeneratedpassword</span>
-</code></pre></notextile>
-# Create a database owned by the new user:
+ Enter password for new role: <span class="userinput">yourgeneratedpassword</span>
+ Enter it again: <span class="userinput">yourgeneratedpassword</span></code></pre></notextile>
+</li>
+<li>Create a database owned by the new user:
<notextile><pre><code>$ <span class="userinput">createdb {{service_database}} -T template0 -E UTF8 -O {{service_role}}</span></code></pre></notextile>
-# Exit the postgres user shell:
+</li>
+{% if use_contrib %}
+<li>Enable the pg_trgm extension
+ <notextile><pre>$ <span class="userinput">psql {{service_database}} -c "CREATE EXTENSION pg_trgm"</span></pre></notextile>
+</li>
+{% endif %}
+<li>Exit the postgres user shell:
<notextile><pre>$ <span class="userinput">exit</span></pre></notextile>
+</li>
+</ol>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
- <a class="navbar-brand" href="{{ site.baseurl }}/">Arvados™ Docs</a>
+ <a class="navbar-brand" href="{{ site.baseurl }}/">Arvados<sup>™</sup> Docs</a>
</div>
<div class="collapse navbar-collapse" id="bs-navbar-collapse">
<ul class="nav navbar-nav">
<li {% if page.navsection == 'api' %} class="active" {% endif %}><a href="{{ site.baseurl }}/api/index.html">API</a></li>
<li {% if page.navsection == 'admin' %} class="active" {% endif %}><a href="{{ site.baseurl }}/admin/index.html">Admin</a></li>
<li {% if page.navsection == 'installguide' %} class="active" {% endif %}><a href="{{ site.baseurl }}/install/index.html">Install</a></li>
+ <li><a href="#" class="dropdown-toggle" role="button" id="versionMenuLink" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">{{ site.current_version }}{% if site.all_versions != [] %} ▾{% endif %}</a>
+ {% if site.all_versions != [] %}
+ <div class="dropdown-menu" aria-labelledby="versionMenuLink">
+ {% for version in site.all_versions %}
+ <a href="/{{ version }}{{ page.url }}" class="dropdown-item">{{ version }}</a><br/>
+ {% endfor %}
+ </div>
+ {% endif %}
+ </li>
<li><a href="https://arvados.org" style="padding-left: 2em">arvados.org »</a></li>
</ul>
h2. General process
+# Consult upgrade notes below to see if any manual configuration updates are necessary.
# Wait for the cluster to be idle and stop Arvados services.
# Install new packages using @apt-get upgrade@ or @yum upgrade@.
# Package installation scripts will perform any necessary data migrations.
-# Consult upgrade notes below to see if any manual configuration updates are necessary.
# Restart Arvados services.
h2. Upgrade notes
You can test if any records in your database are affected by going to the API server directory and running @bundle exec rake symbols:check@. This will report which records contain fields with a leading ':' that would previously have been stripped. If there are records to be updated, you can update the database using @bundle exec rake symbols:stringify@.
+h4. Enabling Postgres trigram indexes
+
+ Feature "#15106":https://dev.arvados.org/issues/15106 improves the speed and functionality of full text search by introducing trigram indexes on text searchable database columns via a migration. Prior to updating, you must first install the postgresql-contrib package on your system and subsequently run the @CREATE EXTENSION pg_trgm@ SQL command on the arvados_production database as a postgres superuser.
+
h3(#v1_4_0). v1.4.0 (2019-06-05)
h4. Populating the new file_count and file_size_total columns on the collections table
./crunch-run.txt
./stderr.txt
./stdout.txt
-~$ <span class="userinput">arv keep get <b>a01df2f7e5bc1c2ad59c60a837e90dc6+166</b>/stdout.txt</span>
+~$ <span class="userinput">arv-get <b>a01df2f7e5bc1c2ad59c60a837e90dc6+166</b>/stdout.txt</span>
2016-08-05T13:53:06.201011Z Hello, Crunch!
</code></pre>
</notextile>
{"errors":["Forbidden"],"error_token":"1533044555+684b532c"}
</code></pre>
</notextile>
+
+h3(#confirm-config). Confirm the public configuration is OK
+
+Confirm the publicly accessible configuration endpoint does not reveal any sensitive information (e.g., a secret that was mistakenly entered under the wrong configuration key). Use the jq program, if you have installed it, to make the JSON document easier to read.
+
+<notextile>
+<pre><code>~$ <span class="userinput">curl http://0.0.0.0:<b>9004</b>/arvados/v1/config | jq .</span>
+{
+ "API": {
+ "MaxItemsPerResponse": 1000,
+ "MaxRequestAmplification": 4,
+ "RequestTimeout": "5m"
+ },
+ ...
+</code></pre>
+</notextile>
{% include 'note_python_sc' %}
# Install PostgreSQL:
- <notextile><pre>~$ <span class="userinput">sudo yum install rh-postgresql95</span>
+ <notextile><pre>~$ <span class="userinput">sudo yum install rh-postgresql95 rh-postgresql95-postgresql-contrib</span>
~$ <span class="userinput">scl enable rh-postgresql95 bash</span></pre></notextile>
# Initialize the database:
<notextile><pre>~$ <span class="userinput">sudo postgresql-setup initdb</span></pre></notextile>
Ubuntu 14.04 (Trusty) requires an updated PostgreSQL version, see "the PostgreSQL ubuntu repository":https://www.postgresql.org/download/linux/ubuntu/
# Install PostgreSQL:
- <notextile><pre>~$ <span class="userinput">sudo apt-get install postgresql</span></pre></notextile>
+ <notextile><pre>~$ <span class="userinput">sudo apt-get install postgresql postgresql-contrib</span></pre></notextile>
# "Set up Arvados credentials and databases":#rails_setup for the services that will use this PostgreSQL install.
<a name="rails_setup"></a>
{% assign service_role = "arvados_sso" %}
{% assign service_database = "arvados_sso_production" %}
+{% assign use_contrib = false %}
{% include 'install_postgres_database' %}
h2(#api). Set up API server credentials and database
{% assign service_role = "arvados" %}
{% assign service_database = "arvados_production" %}
+{% assign use_contrib = true %}
{% include 'install_postgres_database' %}
h3(#arv-ws). arv ws
+This is a frontend to @arv-ws@.
+
@arv ws@ provides access to the websockets event stream.
<notextile>
h3(#arv-keep). arv keep
-@arv keep@ provides access to the Keep storage service.
+@arv keep@ commands for accessing the Keep storage service.
<notextile>
<pre>
h3(#arv-keep-ls). arv keep ls
+This is a frontend to @arv-ls@.
+
<notextile>
<pre>
$ <code class="userinput">arv keep ls --help</code>
h3(#arv-keep-get). arv keep get
+This is a frontend to @arv-get@.
+
<notextile>
<pre>
$ <code class="userinput">arv keep get --help</code>
h3(#arv-keep-put). arv keep put
+This is a frontend to @arv-put@.
+
<notextile>
<pre>
$ <code class="userinput">arv keep put --help</code>
h3(#arv-pipeline-run). arv pipeline run
+WARNING: this uses the obsolete "job" API. Don't use this. You should use @arvados-cwl-runner@ instead.
+
@arv pipeline run@ can be used to start a pipeline run from the command line.
The User Guide has a page with a bit more information on "using arv pipeline run":{{site.baseurl}}/user/topics/running-pipeline-command-line.html.
h3(#arv-run). arv run
+WARNING: this uses the obsolete "job" API. Don't use this. You should use @arvados-cwl-runner@ instead.
+
The @arv-run@ command creates Arvados pipelines at the command line that fan out to multiple concurrent tasks across Arvados compute nodes.
The User Guide has a page on "using arv-run":{{site.baseurl}}/user/topics/arv-run.html.
This installation method is recommended to make the SDK available for use in your own Python programs. It can coexist with the system-wide installation method from a distribution package (option 2, below).
-Run @pip install arvados-python-client@ in an appropriate installation environment, such as a virtualenv.
+Run @pip install arvados-python-client@ in an appropriate installation environment, such as a @virtualenv@.
+
+The SDK uses @pycurl@ which depends on the @libcurl@ C library. To build the module you may have to install additional packages. On Debian 9 this is:
+
+<pre>
+$ apt-get install git build-essential python3-dev libcurl4-openssl-dev libssl1.0-dev
+</pre>
If your version of @pip@ is 1.4 or newer, the @pip install@ command might give an error: "Could not find a version that satisfies the requirement arvados-python-client". If this happens, try @pip install --pre arvados-python-client@.
Run it like any other workflow:
<notextile>
-<pre><code>~$ <span class="userinput">arvados-cwl-runner federated.cwl shards.cwl</span>
+<pre><code>~$ <span class="userinput">arvados-cwl-runner feddemo.cwl shards.cwl</span>
</code></pre>
</notextile>
--- /dev/null
+name: FileOnCluster
+type: record
+fields:
+ file: File
+ cluster: string
\ No newline at end of file
+++ /dev/null
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-cwlVersion: v1.0
-class: CommandLineTool
-inputs:
- inp:
- type: File[]
- inputBinding: {}
-outputs:
- joined: stdout
-stdout: joined.txt
-baseCommand: cat
--- /dev/null
+green
+blue
--- /dev/null
+cwlVersion: v1.0
+class: CommandLineTool
+requirements:
+ SchemaDefRequirement:
+ types:
+ - $import: FileOnCluster.yml
+inputs:
+ select_column: string
+ select_values: File
+ dataset: 'FileOnCluster.yml#FileOnCluster'
+ extract_py:
+ type: File
+ default:
+ class: File
+ location: extract.py
+outputs:
+ out:
+ type: File
+ outputBinding:
+ glob: extracted.csv
+
+arguments: [python, $(inputs.extract_py), $(inputs.select_column), $(inputs.select_values), $(inputs.dataset.file), $(inputs.dataset.cluster)]
--- /dev/null
+import csv
+import sys
+
+select_column = sys.argv[1]
+select_values = sys.argv[2]
+dataset = sys.argv[3]
+cluster = sys.argv[4]
+
+sv = open(select_values, "rt")
+selectvals = [s.strip() for s in sv]
+
+print("selectvals", selectvals)
+
+ds = csv.reader(open(dataset, "rt"))
+header = next(ds)
+print("header is", header)
+columnindex = None
+for i,v in enumerate(header):
+ if v == select_column:
+ columnindex = i
+if columnindex is None:
+ raise Exception("Column %s not found" % select_column)
+
+print("column index", columnindex)
+
+ex = csv.writer(open("extracted.csv", "wt"))
+ex.writerow(["cluster"]+list(header))
+
+for row in ds:
+ if row[columnindex] in selectvals:
+ ex.writerow([cluster]+list(row))
-#
-# Demonstrate Arvados federation features. This performs a parallel
-# scatter over some arbitrary number of files and federated clusters,
-# then joins the results.
-#
+# Demonstrate Arvados federation features. This example searches a
+# list of CSV files that are hosted on different Arvados clusters.
+# For each file, send a task to the remote cluster which will scan
+# file and extracts the rows where the column "select_column" has one
+# of the values appearing in the "select_values" file. The home
+# cluster then runs a task which pulls the results from the remote
+# clusters and merges the results to produce a final report.
+
cwlVersion: v1.0
class: Workflow
$namespaces:
dockerPull: arvados/jobs
# Define a record type so we can conveniently associate the input
- # file, the cluster on which the file lives, and the project on that
- # cluster that will own the container requests and intermediate
- # outputs.
+ # file and the cluster where the task should run.
SchemaDefRequirement:
types:
- - name: FileOnCluster
- type: record
- fields:
- file: File
- cluster: string
- project: string
+ - $import: FileOnCluster.yml
inputs:
- # Expect an array of FileOnCluster records (defined above)
- # as our input.
- shards:
+ select_column: string
+ select_values: File
+
+ datasets:
type:
type: array
- items: FileOnCluster
+ items: FileOnCluster.yml#FileOnCluster
+
+ intermediate_projects: string[]
outputs:
# Will produce an output file with the results of the distributed
- # analysis jobs joined together.
+ # analysis jobs merged together.
joined:
type: File
- outputSource: gather-results/joined
+ outputSource: gather-results/out
steps:
distributed-analysis:
in:
- # Take "shards" array as input, we scatter over it below.
- shard: shards
-
- # Use an expression to extract the "file" field to assign to the
- # "inp" parameter of the tool.
- inp: {valueFrom: $(inputs.shard.file)}
+ select_column: select_column
+ select_values: select_values
+ dataset: datasets
+ intermediate_projects: intermediate_projects
# Scatter over shards, this means creating a parallel job for each
# element in the "shards" array. Expressions are evaluated for
# each element.
- scatter: shard
+ scatter: [dataset, intermediate_projects]
+ scatterMethod: dotproduct
- # Specify the cluster target for this job. This means each
- # separate scatter job will execute on the cluster that was
+ # Specify the cluster target for this task. This means each
+ # separate scatter task will execute on the cluster that was
# specified in the "cluster" field.
#
# Arvados handles streaming data between clusters, for example,
# the federation.
hints:
arv:ClusterTarget:
- cluster_id: $(inputs.shard.cluster)
- project_uuid: $(inputs.shard.project)
+ cluster_id: $(inputs.dataset.cluster)
+ project_uuid: $(inputs.intermediate_projects)
out: [out]
- run: md5sum.cwl
+ run: extract.cwl
# Collect the results of the distributed step and join them into a
# single output file. Arvados handles streaming inputs,
# intermediate results, and outputs between clusters on demand.
gather-results:
in:
- inp: distributed-analysis/out
- out: [joined]
- run: cat.cwl
+ dataset: distributed-analysis/out
+ out: [out]
+ run: merge.cwl
+++ /dev/null
-file-on-clsr1.dat
+++ /dev/null
-file-on-clsr2.dat
+++ /dev/null
-file-on-clsr3.dat
--- /dev/null
+color,item
+blue,ball
+yellow,ball
+red,ball
+green,book
+purple,book
+red,book
+yellow,flower
+purple,flower
+red,bicycle
+red,ball
+green,picture
+yellow,ball
+purple,flower
+yellow,ball
+green,bicycle
+orange,book
+green,book
+orange,picture
+blue,book
+orange,car
+yellow,flower
+purple,ball
+blue,book
+orange,book
+orange,book
+yellow,book
+orange,car
+yellow,car
--- /dev/null
+color,item
+green,bicycle
+red,flower
+blue,bicycle
+yellow,flower
+green,ball
+red,book
+red,bicycle
+yellow,ball
+blue,picture
+green,book
+orange,flower
+blue,ball
+orange,car
+green,book
+yellow,car
+orange,picture
+orange,car
+yellow,flower
+green,ball
+orange,car
+purple,book
+green,ball
+red,flower
+blue,car
+orange,flower
+blue,book
+blue,bicycle
+red,picture
+orange,flower
+orange,book
+blue,flower
+orange,book
--- /dev/null
+color,item
+purple,book
+green,book
+red,bicycle
+yellow,book
+orange,book
+green,car
+green,car
+blue,ball
+yellow,bicycle
+orange,book
+green,bicycle
+blue,flower
+red,bicycle
+purple,bicycle
+green,bicycle
+orange,ball
+yellow,car
+orange,ball
+red,ball
+red,car
+green,picture
+green,flower
+blue,picture
+green,car
+yellow,flower
+purple,flower
+green,ball
+yellow,bicycle
+orange,bicycle
+orange,flower
+yellow,picture
+purple,flower
+green,picture
+orange,car
+orange,picture
+yellow,car
+yellow,picture
+purple,picture
+purple,picture
+purple,flower
+++ /dev/null
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-cwlVersion: v1.0
-class: CommandLineTool
-$namespaces:
- arv: "http://arvados.org/cwl#"
-requirements:
- InlineJavascriptRequirement: {}
-inputs:
- inp:
- type: File
-outputs:
- out:
- type: File
- outputBinding:
- glob: out.txt
-stdin: $(inputs.inp.path)
-stdout: out.txt
-arguments: ["md5sum", "-"]
--- /dev/null
+cwlVersion: v1.0
+class: CommandLineTool
+requirements:
+ SchemaDefRequirement:
+ types:
+ - $import: FileOnCluster.yml
+inputs:
+ dataset:
+ type: File[]
+ inputBinding:
+ position: 1
+ merge_py:
+ type: File
+ default:
+ class: File
+ location: merge.py
+outputs:
+ out:
+ type: File
+ outputBinding:
+ glob: merged.csv
+
+arguments: [python, $(inputs.merge_py)]
--- /dev/null
+import sys
+import csv
+
+merged = open("merged.csv", "wt")
+
+wroteheader = False
+for s in sys.argv[1:]:
+ f = open(s, "rt")
+ header = next(f)
+ if not wroteheader:
+ merged.write(header)
+ wroteheader = True
+ for l in f:
+ merged.write(l)
+ f.close()
-shards:
+select_column: color
+select_values:
+ class: File
+ location: colors_to_select.txt
+
+datasets:
- cluster: clsr1
- project: clsr1-j7d0g-qxc4jcji7n4lafx
file:
class: File
- location: keep:485df2c5cec3207a32f49c42f1cdcca9+61/file-on-clsr1.dat
+ location: keep:0dcf9310e5bf0c07270416d3a0cd6a43+56/items1.csv
- cluster: clsr2
- project: clsr2-j7d0g-ivdrm1hyym21vkq
file:
class: File
- location: keep:ae6e9c3e9bfa52a0122ecb489d8198ff+61/file-on-clsr2.dat
+ location: keep:12707d325a3f4687674b858bd32beae9+56/items2.csv
- cluster: clsr3
- project: clsr3-j7d0g-e3njz2s53lyb0ka
file:
class: File
- location: keep:0b43a0ef9ea592d5d7b299978dfa8643+61/file-on-clsr3.dat
+ location: keep:dbff6bb7fc43176527af5eb9dec28871+56/items3.csv
+
+intermediate_projects:
+ - clsr1-j7d0g-qxc4jcji7n4lafx
+ - clsr2-j7d0g-e7r20egb8hlgn53
+ - clsr3-j7d0g-vrl00zoku9spnen
---
layout: default
navsection: userguide
-title: Welcome to Arvados™!
+title: Welcome to Arvados<sup>™</sup>!
...
{% comment %}
Copyright (C) The Arvados Authors. All rights reserved.
In order to reassemble the file, Keep stores a *collection* data block which lists in sequence the data blocks that make up the original file. A collection data block may store the information for multiple files, including a directory structure.
-In this example we will use @c1bad4b39ca5a924e481008009d94e32+210@, which we added to Keep in "how to upload data":{{ site.baseurl }}/user/tutorials/tutorial-keep.html. First let us examine the contents of this collection using @arv keep get@:
+In this example we will use @c1bad4b39ca5a924e481008009d94e32+210@, which we added to Keep in "how to upload data":{{ site.baseurl }}/user/tutorials/tutorial-keep.html. First let us examine the contents of this collection using @arv-get@:
<notextile>
-<pre><code>~$ <span class="userinput">arv keep get c1bad4b39ca5a924e481008009d94e32+210</span>
+<pre><code>~$ <span class="userinput">arv-get c1bad4b39ca5a924e481008009d94e32+210</span>
. 204e43b8a1185621ca55a94839582e6f+67108864 b9677abbac956bd3e86b1deb28dfac03+67108864 fc15aff2a762b13f521baf042140acec+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:227212247:var-GS000016015-ASM.tsv.bz2
</code></pre>
</notextile>
-The command @arv keep get@ fetches the contents of the collection @c1bad4b39ca5a924e481008009d94e32+210@. In this example, this collection includes a single file @var-GS000016015-ASM.tsv.bz2@ which is 227212247 bytes long, and is stored using four sequential data blocks, @204e43b8a1185621ca55a94839582e6f+67108864@, @b9677abbac956bd3e86b1deb28dfac03+67108864@, @fc15aff2a762b13f521baf042140acec+67108864@, and @323d2a3ce20370c4ca1d3462a344f8fd+25885655@.
+The command @arv-get@ fetches the contents of the collection @c1bad4b39ca5a924e481008009d94e32+210@. In this example, this collection includes a single file @var-GS000016015-ASM.tsv.bz2@ which is 227212247 bytes long, and is stored using four sequential data blocks, @204e43b8a1185621ca55a94839582e6f+67108864@, @b9677abbac956bd3e86b1deb28dfac03+67108864@, @fc15aff2a762b13f521baf042140acec+67108864@, and @323d2a3ce20370c4ca1d3462a344f8fd+25885655@.
-Let's use @arv keep get@ to download the first data block:
+Let's use @arv-get@ to download the first data block:
notextile. <pre><code>~$ <span class="userinput">cd /scratch/<b>you</b></span>
-/scratch/<b>you</b>$ <span class="userinput">arv keep get 204e43b8a1185621ca55a94839582e6f+67108864 > block1</span></code></pre>
+/scratch/<b>you</b>$ <span class="userinput">arv-get 204e43b8a1185621ca55a94839582e6f+67108864 > block1</span></code></pre>
{% include 'notebox_begin' %}
notextile. <pre><code>WARNING:root:API lookup failed for collection 204e43b8a1185621ca55a94839582e6f+67108864 (<class 'apiclient.errors.HttpError'>: <HttpError 404 when requesting https://qr1hi.arvadosapi.com/arvados/v1/collections/204e43b8a1185621ca55a94839582e6f%2B67108864?alt=json returned "Not Found">)</code></pre>
-This happens because @arv keep get@ tries to find a collection with this identifier. When that fails, it emits this warning, then looks for a datablock instead, which succeeds.
+This happens because @arv-get@ tries to find a collection with this identifier. When that fails, it emits this warning, then looks for a datablock instead, which succeeds.
{% include 'notebox_end' %}
Download the GATK binary tarball[1] -- e.g., @GenomeAnalysisTK-2.6-4.tar.bz2@ -- and "copy it to your Arvados VM":{{site.baseurl}}/user/tutorials/tutorial-keep.html.
<notextile>
-<pre><code>~$ <span class="userinput">arv keep put GenomeAnalysisTK-2.6-4.tar.bz2</span>
+<pre><code>~$ <span class="userinput">arv-put GenomeAnalysisTK-2.6-4.tar.bz2</span>
c905c8d8443a9c44274d98b7c6cfaa32+94
</code></pre>
</notextile>
</code></pre>
</notextile>
-This collection consists of the @md5sum.txt@ file. Use @arv keep get@ to show the contents of the @md5sum.txt@ file:
+This collection consists of the @md5sum.txt@ file. Use @arv-get@ to show the contents of the @md5sum.txt@ file:
<notextile>
-<pre><code>~$ <span class="userinput">arv keep get dd755dbc8d49a67f4fe7dc843e4f10a6+54/md5sum.txt</span>
+<pre><code>~$ <span class="userinput">arv-get dd755dbc8d49a67f4fe7dc843e4f10a6+54/md5sum.txt</span>
44b8ae3fde7a8a88d2f7ebd237625b4f ./var-GS000016015-ASM.tsv.bz2
</code></pre>
</notextile>
</code></pre>
</notextile>
-The log collection consists of one log file named with the job's UUID. You can access it using @arv keep get@:
+The log collection consists of one log file named with the job's UUID. You can access it using @arv-get@:
<notextile>
-<pre><code>~$ <span class="userinput">arv keep get xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx+91/qr1hi-8i9sb-xxxxxxxxxxxxxxx.log.txt</span>
+<pre><code>~$ <span class="userinput">arv-get xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx+91/qr1hi-8i9sb-xxxxxxxxxxxxxxx.log.txt</span>
2013-12-16_20:44:35 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 check slurm allocation
2013-12-16_20:44:35 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 node compute13 - 8 slots
2013-12-16_20:44:36 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575 start
<notextile>
<pre><code>~/$USER/crunch_scripts$ <span class="userinput">arv keep ls e2ccd204bca37c77c0ba59fc470cd0f7+162</span>
./md5sum.txt
-~/$USER/crunch_scripts$ <span class="userinput">arv keep get e2ccd204bca37c77c0ba59fc470cd0f7+162/md5sum.txt</span>
+~/$USER/crunch_scripts$ <span class="userinput">arv-get e2ccd204bca37c77c0ba59fc470cd0f7+162/md5sum.txt</span>
0f1d6bcf55c34bed7f92a805d2d89bbf alice.txt
504938460ef369cd275e4ef58994cffe bob.txt
8f3b36aff310e06f3c5b9e95678ff77a carol.txt
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-Arvados Data collections can be uploaded using either the @arv keep put@ command line tool or using Workbench.
+Arvados Data collections can be uploaded using either the @arv-put@ command line tool or using Workbench.
# "*Upload using command line tool*":#upload-using-command
# "*Upload using Workbench*":#upload-using-workbench
{% include 'tutorial_expectations' %}
-To upload a file to Keep using @arv keep put@:
+To upload a file to Keep using @arv-put@:
<notextile>
-<pre><code>~$ <span class="userinput">arv keep put var-GS000016015-ASM.tsv.bz2</span>
+<pre><code>~$ <span class="userinput">arv-put var-GS000016015-ASM.tsv.bz2</span>
216M / 216M 100.0%
Collection saved as ...
qr1hi-4zz18-xxxxxxxxxxxxxxx
Note: The file used in this example is a freely available TSV file containing variant annotations from the "Personal Genome Project (PGP)":http://www.pgp-hms.org participant "hu599905":https://my.pgp-hms.org/profile/hu599905), downloadable "here":https://warehouse.pgp-hms.org/warehouse/f815ec01d5d2f11cb12874ab2ed50daa+234+K@ant/var-GS000016015-ASM.tsv.bz2. Alternatively, you can replace @var-GS000016015-ASM.tsv.bz2@ with the name of any file you have locally, or you could get the TSV file by "downloading it from Keep.":{{site.baseurl}}/user/tutorials/tutorial-keep-get.html
-<notextile><a name="dir"></a></notextile>It is also possible to upload an entire directory with @arv keep put@:
+<notextile><a name="dir"></a></notextile>It is also possible to upload an entire directory with @arv-put@:
<notextile>
<pre><code>~$ <span class="userinput">mkdir tmp</span>
~$ <span class="userinput">echo "hello alice" > tmp/alice.txt</span>
~$ <span class="userinput">echo "hello bob" > tmp/bob.txt</span>
~$ <span class="userinput">echo "hello carol" > tmp/carol.txt</span>
-~$ <span class="userinput">arv keep put tmp</span>
+~$ <span class="userinput">arv-put tmp</span>
0M / 0M 100.0%
Collection saved as ...
qr1hi-4zz18-yyyyyyyyyyyyyyy
</code></pre>
</notextile>
-In both examples, the @arv keep put@ command created a collection. The first collection contains the single uploaded file. The second collection contains the entire uploaded directory.
+In both examples, the @arv-put@ command created a collection. The first collection contains the single uploaded file. The second collection contains the entire uploaded directory.
-@arv keep put@ accepts quite a few optional command line arguments, which are described on the "arv subcommands":{{site.baseurl}}/sdk/cli/subcommands.html#arv-keep-put page.
+@arv-put@ accepts quite a few optional command line arguments, which are described on the "arv subcommands":{{site.baseurl}}/sdk/cli/subcommands.html#arv-keep-put page.
h3. Locate your collection in Workbench
-Visit the Workbench *Dashboard*. Click on *Projects*<span class="caret"></span> dropdown menu in the top navigation menu, select your *Home* project. Your newly uploaded collection should appear near the top of the *Data collections* tab. The collection name printed by @arv keep put@ will appear under the *name* column.
+Visit the Workbench *Dashboard*. Click on *Projects*<span class="caret"></span> dropdown menu in the top navigation menu, select your *Home* project. Your newly uploaded collection should appear near the top of the *Data collections* tab. The collection name printed by @arv-put@ will appear under the *name* column.
To move the collection to a different project, check the box at the left of the collection row. Pull down the *Selection...*<span class="caret"></span> menu near the top of the page tab, and select *Move selected...* button. This will open a dialog box where you can select a destination project for the collection. Click a project, then finally the <span class="btn btn-sm btn-primary">Move</span> button.
import (
"bytes"
+ "flag"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
- "git.curoverse.com/arvados.git/lib/cmd"
+ "git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/ctxlog"
"github.com/ghodss/yaml"
)
-var DumpCommand cmd.Handler = dumpCommand{}
+var DumpCommand dumpCommand
type dumpCommand struct{}
fmt.Fprintf(stderr, "%s\n", err)
}
}()
- if len(args) != 0 {
- err = fmt.Errorf("usage: %s <config-src.yaml >config-min.yaml", prog)
+
+ flags := flag.NewFlagSet("", flag.ContinueOnError)
+ flags.SetOutput(stderr)
+ configFile := flags.String("config", arvados.DefaultConfigFile, "Site configuration `file`")
+ err = flags.Parse(args)
+ if err == flag.ErrHelp {
+ err = nil
+ return 0
+ } else if err != nil {
+ return 2
+ }
+
+ if len(flags.Args()) != 0 {
+ flags.Usage()
return 2
}
log := ctxlog.New(stderr, "text", "info")
- cfg, err := Load(stdin, log)
+ cfg, err := loadFileOrStdin(*configFile, stdin, log)
if err != nil {
return 1
}
return 0
}
-var CheckCommand cmd.Handler = checkCommand{}
+var CheckCommand checkCommand
type checkCommand struct{}
fmt.Fprintf(stderr, "%s\n", err)
}
}()
- if len(args) != 0 {
- err = fmt.Errorf("usage: %s <config-src.yaml && echo 'no changes needed'", prog)
+
+ flags := flag.NewFlagSet("", flag.ContinueOnError)
+ flags.SetOutput(stderr)
+ configFile := flags.String("config", arvados.DefaultConfigFile, "Site configuration `file`")
+ err = flags.Parse(args)
+ if err == flag.ErrHelp {
+ err = nil
+ return 0
+ } else if err != nil {
+ return 2
+ }
+
+ if len(flags.Args()) != 0 {
+ flags.Usage()
return 2
}
log := &plainLogger{w: stderr}
- buf, err := ioutil.ReadAll(stdin)
+ var buf []byte
+ if *configFile == "-" {
+ buf, err = ioutil.ReadAll(stdin)
+ } else {
+ buf, err = ioutil.ReadFile(*configFile)
+ }
if err != nil {
return 1
}
import (
"bytes"
+ "git.curoverse.com/arvados.git/lib/cmd"
check "gopkg.in/check.v1"
)
var _ = check.Suite(&CommandSuite{})
+var (
+ // Commands must satisfy cmd.Handler interface
+ _ cmd.Handler = dumpCommand{}
+ _ cmd.Handler = checkCommand{}
+)
+
type CommandSuite struct{}
func (s *CommandSuite) TestBadArg(c *check.C) {
var stderr bytes.Buffer
code := DumpCommand.RunCommand("arvados config-dump", []string{"-badarg"}, bytes.NewBuffer(nil), bytes.NewBuffer(nil), &stderr)
c.Check(code, check.Equals, 2)
- c.Check(stderr.String(), check.Matches, `(?ms)usage: .*`)
+ c.Check(stderr.String(), check.Matches, `(?ms)flag provided but not defined: -badarg\nUsage:\n.*`)
}
func (s *CommandSuite) TestEmptyInput(c *check.C) {
var stdout, stderr bytes.Buffer
- code := DumpCommand.RunCommand("arvados config-dump", nil, &bytes.Buffer{}, &stdout, &stderr)
+ code := DumpCommand.RunCommand("arvados config-dump", []string{"-config", "-"}, &bytes.Buffer{}, &stdout, &stderr)
c.Check(code, check.Equals, 1)
c.Check(stderr.String(), check.Matches, `config does not define any clusters\n`)
}
API:
MaxItemsPerResponse: 1234
`
- code := CheckCommand.RunCommand("arvados config-check", nil, bytes.NewBufferString(in), &stdout, &stderr)
+ code := CheckCommand.RunCommand("arvados config-check", []string{"-config", "-"}, bytes.NewBufferString(in), &stdout, &stderr)
c.Check(code, check.Equals, 0)
c.Check(stdout.String(), check.Equals, "")
c.Check(stderr.String(), check.Equals, "")
RequestLimits:
MaxItemsPerResponse: 1234
`
- code := CheckCommand.RunCommand("arvados config-check", nil, bytes.NewBufferString(in), &stdout, &stderr)
+ code := CheckCommand.RunCommand("arvados config-check", []string{"-config", "-"}, bytes.NewBufferString(in), &stdout, &stderr)
c.Check(code, check.Equals, 1)
- c.Check(stdout.String(), check.Matches, `(?ms).*API:\n\- +.*MaxItemsPerResponse: 1000\n\+ +MaxItemsPerResponse: 1234\n.*`)
+ c.Check(stdout.String(), check.Matches, `(?ms).*\n\- +.*MaxItemsPerResponse: 1000\n\+ +MaxItemsPerResponse: 1234\n.*`)
}
func (s *CommandSuite) TestCheckUnknownKey(c *check.C) {
ConnectionPool:
{Bogus5: true}
`
- code := CheckCommand.RunCommand("arvados config-check", nil, bytes.NewBufferString(in), &stdout, &stderr)
+ code := CheckCommand.RunCommand("arvados config-check", []string{"-config", "-"}, bytes.NewBufferString(in), &stdout, &stderr)
c.Log(stderr.String())
c.Check(code, check.Equals, 1)
c.Check(stderr.String(), check.Matches, `(?ms).*deprecated or unknown config entry: Clusters.z1234.Bogus1\n.*`)
InternalURLs:
http://localhost:12345: {}
`
- code := DumpCommand.RunCommand("arvados config-dump", nil, bytes.NewBufferString(in), &stdout, &stderr)
+ code := DumpCommand.RunCommand("arvados config-dump", []string{"-config", "-"}, bytes.NewBufferString(in), &stdout, &stderr)
c.Check(code, check.Equals, 0)
c.Check(stdout.String(), check.Matches, `(?ms).*TimeoutBooting: 10m\n.*`)
c.Check(stdout.String(), check.Matches, `(?ms).*http://localhost:12345: {}\n.*`)
UnknownKey: foobar
ManagementToken: secret
`
- code := DumpCommand.RunCommand("arvados config-dump", nil, bytes.NewBufferString(in), &stdout, &stderr)
+ code := DumpCommand.RunCommand("arvados config-dump", []string{"-config", "-"}, bytes.NewBufferString(in), &stdout, &stderr)
c.Check(code, check.Equals, 0)
c.Check(stderr.String(), check.Matches, `(?ms).*deprecated or unknown config entry: Clusters.z1234.UnknownKey.*`)
c.Check(stdout.String(), check.Matches, `(?ms)Clusters:\n z1234:\n.*`)
InternalURLs: {}
ExternalURL: ""
GitSSH:
+ InternalURLs: {}
ExternalURL: ""
DispatchCloud:
InternalURLs: {}
ExternalURL: "-"
SSO:
+ InternalURLs: {}
ExternalURL: ""
Keepproxy:
InternalURLs: {}
InternalURLs: {}
ExternalURL: "-"
Composer:
+ InternalURLs: {}
ExternalURL: ""
WebShell:
+ InternalURLs: {}
ExternalURL: ""
Workbench1:
InternalURLs: {}
ExternalURL: ""
Workbench2:
+ InternalURLs: {}
ExternalURL: ""
Nodemanager:
InternalURLs: {}
# Interval (seconds) between asynchronous permission view updates. Any
# permission-updating API called with the 'async' parameter schedules a an
# update on the permission view in the future, if not already scheduled.
- AsyncPermissionsUpdateInterval: 20
+ AsyncPermissionsUpdateInterval: 20s
# Maximum number of concurrent outgoing requests to make while
# serving a single incoming multi-cluster (federated) request.
# Interval (seconds) between trash sweeps. During a trash sweep,
# collections are marked as trash if their trash_at time has
# arrived, and deleted if their delete_at time has arrived.
- TrashSweepInterval: 60
+ TrashSweepInterval: 60s
# If true, enable collection versioning.
# When a collection's preserve_version field is true or the current version
# the current collection.
CollectionVersioning: false
- # 0 = auto-create a new version on every update.
- # -1 = never auto-create new versions.
- # > 0 = auto-create a new version when older than the specified number of seconds.
- PreserveVersionIfIdle: -1
+ # 0s = auto-create a new version on every update.
+ # -1s = never auto-create new versions.
+ # > 0s = auto-create a new version when older than the specified number of seconds.
+ PreserveVersionIfIdle: -1s
# Managed collection properties. At creation time, if the client didn't
# provide the listed keys, they will be automatically populated following
# scheduling parameter parameter set.
UsePreemptibleInstances: false
- # Include details about job reuse decisions in the server log. This
- # causes additional database queries to run, so it should not be
- # enabled unless you expect to examine the resulting logs for
- # troubleshooting purposes.
- LogReuseDecisions: false
-
# PEM encoded SSH key (RSA, DSA, or ECDSA) used by the
# (experimental) cloud dispatcher for executing containers on
# worker VMs. Begins with "-----BEGIN RSA PRIVATE KEY-----\n"
LogBytesPerEvent: 4096
LogSecondsBetweenEvents: 1
- # The sample period for throttling logs, in seconds.
- LogThrottlePeriod: 60
+ # The sample period for throttling logs.
+ LogThrottlePeriod: 60s
# Maximum number of bytes that job can log over crunch_log_throttle_period
# before being silenced until the end of the period.
# silenced by throttling are not counted against this total.
LimitLogBytesPerJob: 67108864
- LogPartialLineThrottlePeriod: 5
+ LogPartialLineThrottlePeriod: 5s
- # Container logs are written to Keep and saved in a collection,
- # which is updated periodically while the container runs. This
- # value sets the interval (given in seconds) between collection
- # updates.
- LogUpdatePeriod: 1800
+ # Container logs are written to Keep and saved in a
+ # collection, which is updated periodically while the
+ # container runs. This value sets the interval between
+ # collection updates.
+ LogUpdatePeriod: 30m
# The log collection is also updated when the specified amount of
# log data (given in bytes) is produced in less than one update
# period.
- LogUpdateSize: 33554432
+ LogUpdateSize: 32MiB
SLURM:
Managed:
TimeoutShutdown: 10s
# Worker VM image ID.
- ImageID: ami-01234567890abcdef
+ ImageID: ""
# Tags to add on all resources (VMs, NICs, disks) created by
# the container dispatcher. (Arvados's own tags --
Insecure: false
ActivateUsers: false
SAMPLE:
+ # API endpoint host or host:port; default is {id}.arvadosapi.com
Host: sample.arvadosapi.com
+
+ # Perform a proxy request when a local client requests an
+ # object belonging to this remote.
Proxy: false
+
+ # Default "https". Can be set to "http" for testing.
Scheme: https
+
+ # Disable TLS verify. Can be set to true for testing.
Insecure: false
+
+ # When users present tokens issued by this remote cluster, and
+ # their accounts are active on the remote cluster, activate
+ # them on this cluster too.
ActivateUsers: false
+
+ Workbench:
+ # Workbench1 configs
+ Theme: default
+ ActivationContactLink: mailto:info@arvados.org
+ ArvadosDocsite: https://doc.arvados.org
+ ArvadosPublicDataDocURL: https://playground.arvados.org/projects/public
+ ShowUserAgreementInline: false
+ SecretToken: ""
+ SecretKeyBase: ""
+ RepositoryCache: /var/www/arvados-workbench/current/tmp/git
+ UserProfileFormFields:
+ SAMPLE:
+ Type: text
+ FormFieldTitle: ""
+ FormFieldDescription: ""
+ Required: true
+ UserProfileFormMessage: 'Welcome to Arvados. All <span style="color:red">required fields</span> must be completed before you can proceed.'
+ ApplicationMimetypesWithViewIcon:
+ cwl: {}
+ fasta: {}
+ go: {}
+ javascript: {}
+ json: {}
+ pdf: {}
+ python: {}
+ x-python: {}
+ r: {}
+ rtf: {}
+ sam: {}
+ x-sh: {}
+ vnd.realvnc.bed: {}
+ xml: {}
+ xsl: {}
+ LogViewerMaxBytes: 1M
+ EnablePublicProjectsPage: true
+ EnableGettingStartedPopup: false
+ APIResponseCompression: true
+ APIClientConnectTimeout: 2m
+ APIClientReceiveTimeout: 5m
+ RunningJobLogRecordsToFetch: 2000
+ ShowRecentCollectionsOnDashboard: true
+ ShowUserNotifications: true
+ MultiSiteSearch: false
+ Repositories: true
+ SiteName: Arvados Workbench
+
+ # Workbench2 configs
+ VocabularyURL: ""
+ FileViewersConfigURL: ""
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package config
+
+import (
+ "encoding/json"
+ "errors"
+ "fmt"
+ "io"
+ "strings"
+
+ "git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+// ExportJSON writes a JSON object with the safe (non-secret) portions
+// of the cluster config to w.
+func ExportJSON(w io.Writer, cluster *arvados.Cluster) error {
+ buf, err := json.Marshal(cluster)
+ if err != nil {
+ return err
+ }
+ var m map[string]interface{}
+ err = json.Unmarshal(buf, &m)
+ if err != nil {
+ return err
+ }
+ err = redactUnsafe(m, "", "")
+ if err != nil {
+ return err
+ }
+ return json.NewEncoder(w).Encode(m)
+}
+
+// whitelist classifies configs as safe/unsafe to reveal to
+// unauthenticated clients.
+//
+// Every config entry must either be listed explicitly here along with
+// all of its parent keys (e.g., "API" + "API.RequestTimeout"), or
+// have an ancestor listed as false (e.g.,
+// "PostgreSQL.Connection.password" has an ancestor
+// "PostgreSQL.Connection" with a false value). Otherwise, it is a bug
+// which should be caught by tests.
+//
+// Example: API.RequestTimeout is safe because whitelist["API"] == and
+// whitelist["API.RequestTimeout"] == true.
+//
+// Example: PostgreSQL.Connection.password is not safe because
+// whitelist["PostgreSQL.Connection"] == false.
+//
+// Example: PostgreSQL.BadKey would cause an error because
+// whitelist["PostgreSQL"] isn't false, and neither
+// whitelist["PostgreSQL.BadKey"] nor whitelist["PostgreSQL.*"]
+// exists.
+var whitelist = map[string]bool{
+ // | sort -t'"' -k2,2
+ "API": true,
+ "API.AsyncPermissionsUpdateInterval": false,
+ "API.DisabledAPIs": false,
+ "API.MaxIndexDatabaseRead": false,
+ "API.MaxItemsPerResponse": true,
+ "API.MaxRequestAmplification": false,
+ "API.MaxRequestSize": true,
+ "API.RailsSessionSecretToken": false,
+ "API.RequestTimeout": true,
+ "AuditLogs": false,
+ "AuditLogs.MaxAge": false,
+ "AuditLogs.MaxDeleteBatch": false,
+ "AuditLogs.UnloggedAttributes": false,
+ "Collections": true,
+ "Collections.BlobSigning": true,
+ "Collections.BlobSigningKey": false,
+ "Collections.BlobSigningTTL": true,
+ "Collections.CollectionVersioning": false,
+ "Collections.DefaultReplication": true,
+ "Collections.DefaultTrashLifetime": true,
+ "Collections.PreserveVersionIfIdle": true,
+ "Collections.TrashSweepInterval": false,
+ "Containers": true,
+ "Containers.CloudVMs": false,
+ "Containers.DefaultKeepCacheRAM": true,
+ "Containers.DispatchPrivateKey": false,
+ "Containers.JobsAPI": true,
+ "Containers.JobsAPI.CrunchJobUser": false,
+ "Containers.JobsAPI.CrunchJobWrapper": false,
+ "Containers.JobsAPI.CrunchRefreshTrigger": false,
+ "Containers.JobsAPI.DefaultDockerImage": false,
+ "Containers.JobsAPI.Enable": true,
+ "Containers.JobsAPI.GitInternalDir": false,
+ "Containers.JobsAPI.ReuseJobIfOutputsDiffer": false,
+ "Containers.Logging": false,
+ "Containers.LogReuseDecisions": false,
+ "Containers.MaxComputeVMs": false,
+ "Containers.MaxDispatchAttempts": false,
+ "Containers.MaxRetryAttempts": true,
+ "Containers.SLURM": false,
+ "Containers.StaleLockTimeout": false,
+ "Containers.SupportedDockerImageFormats": true,
+ "Containers.UsePreemptibleInstances": true,
+ "Git": false,
+ "InstanceTypes": true,
+ "InstanceTypes.*": true,
+ "InstanceTypes.*.*": true,
+ "Login": false,
+ "Mail": false,
+ "ManagementToken": false,
+ "PostgreSQL": false,
+ "RemoteClusters": true,
+ "RemoteClusters.*": true,
+ "RemoteClusters.*.ActivateUsers": true,
+ "RemoteClusters.*.Host": true,
+ "RemoteClusters.*.Insecure": true,
+ "RemoteClusters.*.Proxy": true,
+ "RemoteClusters.*.Scheme": true,
+ "Services": true,
+ "Services.*": true,
+ "Services.*.ExternalURL": true,
+ "Services.*.InternalURLs": false,
+ "SystemLogs": false,
+ "SystemRootToken": false,
+ "TLS": false,
+ "Users": false,
+ "Workbench": false,
+}
+
+func redactUnsafe(m map[string]interface{}, mPrefix, lookupPrefix string) error {
+ var errs []string
+ for k, v := range m {
+ lookupKey := k
+ safe, ok := whitelist[lookupPrefix+k]
+ if !ok {
+ lookupKey = "*"
+ safe, ok = whitelist[lookupPrefix+"*"]
+ }
+ if !ok {
+ errs = append(errs, fmt.Sprintf("config bug: key %q not in whitelist map", lookupPrefix+k))
+ continue
+ }
+ if !safe {
+ delete(m, k)
+ continue
+ }
+ if v, ok := v.(map[string]interface{}); ok {
+ err := redactUnsafe(v, mPrefix+k+".", lookupPrefix+lookupKey+".")
+ if err != nil {
+ errs = append(errs, err.Error())
+ }
+ }
+ }
+ if len(errs) > 0 {
+ return errors.New(strings.Join(errs, "\n"))
+ }
+ return nil
+}
--- /dev/null
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package config
+
+import (
+ "bytes"
+ "regexp"
+ "strings"
+
+ "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+ check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&ExportSuite{})
+
+type ExportSuite struct{}
+
+func (s *ExportSuite) TestExport(c *check.C) {
+ confdata := bytes.Replace(DefaultYAML, []byte("SAMPLE"), []byte("testkey"), -1)
+ cfg, err := Load(bytes.NewBuffer(confdata), ctxlog.TestLogger(c))
+ c.Assert(err, check.IsNil)
+ cluster := cfg.Clusters["xxxxx"]
+ cluster.ManagementToken = "abcdefg"
+
+ var exported bytes.Buffer
+ err = ExportJSON(&exported, &cluster)
+ c.Check(err, check.IsNil)
+ if err != nil {
+ c.Logf("If all the new keys are safe, add these to whitelist in export.go:")
+ for _, k := range regexp.MustCompile(`"[^"]*"`).FindAllString(err.Error(), -1) {
+ c.Logf("\t%q: true,", strings.Replace(k, `"`, "", -1))
+ }
+ }
+ c.Check(exported.String(), check.Not(check.Matches), `(?ms).*abcdefg.*`)
+}
InternalURLs: {}
ExternalURL: ""
GitSSH:
+ InternalURLs: {}
ExternalURL: ""
DispatchCloud:
InternalURLs: {}
ExternalURL: "-"
SSO:
+ InternalURLs: {}
ExternalURL: ""
Keepproxy:
InternalURLs: {}
InternalURLs: {}
ExternalURL: "-"
Composer:
+ InternalURLs: {}
ExternalURL: ""
WebShell:
+ InternalURLs: {}
ExternalURL: ""
Workbench1:
InternalURLs: {}
ExternalURL: ""
Workbench2:
+ InternalURLs: {}
ExternalURL: ""
Nodemanager:
InternalURLs: {}
# Interval (seconds) between asynchronous permission view updates. Any
# permission-updating API called with the 'async' parameter schedules a an
# update on the permission view in the future, if not already scheduled.
- AsyncPermissionsUpdateInterval: 20
+ AsyncPermissionsUpdateInterval: 20s
# Maximum number of concurrent outgoing requests to make while
# serving a single incoming multi-cluster (federated) request.
# Interval (seconds) between trash sweeps. During a trash sweep,
# collections are marked as trash if their trash_at time has
# arrived, and deleted if their delete_at time has arrived.
- TrashSweepInterval: 60
+ TrashSweepInterval: 60s
# If true, enable collection versioning.
# When a collection's preserve_version field is true or the current version
# the current collection.
CollectionVersioning: false
- # 0 = auto-create a new version on every update.
- # -1 = never auto-create new versions.
- # > 0 = auto-create a new version when older than the specified number of seconds.
- PreserveVersionIfIdle: -1
+ # 0s = auto-create a new version on every update.
+ # -1s = never auto-create new versions.
+ # > 0s = auto-create a new version when older than the specified number of seconds.
+ PreserveVersionIfIdle: -1s
# Managed collection properties. At creation time, if the client didn't
# provide the listed keys, they will be automatically populated following
# scheduling parameter parameter set.
UsePreemptibleInstances: false
- # Include details about job reuse decisions in the server log. This
- # causes additional database queries to run, so it should not be
- # enabled unless you expect to examine the resulting logs for
- # troubleshooting purposes.
- LogReuseDecisions: false
-
# PEM encoded SSH key (RSA, DSA, or ECDSA) used by the
# (experimental) cloud dispatcher for executing containers on
# worker VMs. Begins with "-----BEGIN RSA PRIVATE KEY-----\n"
LogBytesPerEvent: 4096
LogSecondsBetweenEvents: 1
- # The sample period for throttling logs, in seconds.
- LogThrottlePeriod: 60
+ # The sample period for throttling logs.
+ LogThrottlePeriod: 60s
# Maximum number of bytes that job can log over crunch_log_throttle_period
# before being silenced until the end of the period.
# silenced by throttling are not counted against this total.
LimitLogBytesPerJob: 67108864
- LogPartialLineThrottlePeriod: 5
+ LogPartialLineThrottlePeriod: 5s
- # Container logs are written to Keep and saved in a collection,
- # which is updated periodically while the container runs. This
- # value sets the interval (given in seconds) between collection
- # updates.
- LogUpdatePeriod: 1800
+ # Container logs are written to Keep and saved in a
+ # collection, which is updated periodically while the
+ # container runs. This value sets the interval between
+ # collection updates.
+ LogUpdatePeriod: 30m
# The log collection is also updated when the specified amount of
# log data (given in bytes) is produced in less than one update
# period.
- LogUpdateSize: 33554432
+ LogUpdateSize: 32MiB
SLURM:
Managed:
TimeoutShutdown: 10s
# Worker VM image ID.
- ImageID: ami-01234567890abcdef
+ ImageID: ""
# Tags to add on all resources (VMs, NICs, disks) created by
# the container dispatcher. (Arvados's own tags --
Insecure: false
ActivateUsers: false
SAMPLE:
+ # API endpoint host or host:port; default is {id}.arvadosapi.com
Host: sample.arvadosapi.com
+
+ # Perform a proxy request when a local client requests an
+ # object belonging to this remote.
Proxy: false
+
+ # Default "https". Can be set to "http" for testing.
Scheme: https
+
+ # Disable TLS verify. Can be set to true for testing.
Insecure: false
+
+ # When users present tokens issued by this remote cluster, and
+ # their accounts are active on the remote cluster, activate
+ # them on this cluster too.
ActivateUsers: false
+
+ Workbench:
+ # Workbench1 configs
+ Theme: default
+ ActivationContactLink: mailto:info@arvados.org
+ ArvadosDocsite: https://doc.arvados.org
+ ArvadosPublicDataDocURL: https://playground.arvados.org/projects/public
+ ShowUserAgreementInline: false
+ SecretToken: ""
+ SecretKeyBase: ""
+ RepositoryCache: /var/www/arvados-workbench/current/tmp/git
+ UserProfileFormFields:
+ SAMPLE:
+ Type: text
+ FormFieldTitle: ""
+ FormFieldDescription: ""
+ Required: true
+ UserProfileFormMessage: 'Welcome to Arvados. All <span style="color:red">required fields</span> must be completed before you can proceed.'
+ ApplicationMimetypesWithViewIcon:
+ cwl: {}
+ fasta: {}
+ go: {}
+ javascript: {}
+ json: {}
+ pdf: {}
+ python: {}
+ x-python: {}
+ r: {}
+ rtf: {}
+ sam: {}
+ x-sh: {}
+ vnd.realvnc.bed: {}
+ xml: {}
+ xsl: {}
+ LogViewerMaxBytes: 1M
+ EnablePublicProjectsPage: true
+ EnableGettingStartedPopup: false
+ APIResponseCompression: true
+ APIClientConnectTimeout: 2m
+ APIClientReceiveTimeout: 5m
+ RunningJobLogRecordsToFetch: 2000
+ ShowRecentCollectionsOnDashboard: true
+ ShowUserNotifications: true
+ MultiSiteSearch: false
+ Repositories: true
+ SiteName: Arvados Workbench
+
+ # Workbench2 configs
+ VocabularyURL: ""
+ FileViewersConfigURL: ""
`)
Warnf(string, ...interface{})
}
+func loadFileOrStdin(path string, stdin io.Reader, log logger) (*arvados.Config, error) {
+ if path == "-" {
+ return load(stdin, log, true)
+ } else {
+ return LoadFile(path, log)
+ }
+}
+
func LoadFile(path string, log logger) (*arvados.Config, error) {
f, err := os.Open(path)
if err != nil {
c.Check(logs, check.HasLen, 2)
}
+func (s *LoadSuite) TestNoUnrecognizedKeysInDefaultConfig(c *check.C) {
+ var logbuf bytes.Buffer
+ logger := logrus.New()
+ logger.Out = &logbuf
+ var supplied map[string]interface{}
+ yaml.Unmarshal(DefaultYAML, &supplied)
+ cfg, err := Load(bytes.NewBuffer(DefaultYAML), logger)
+ c.Assert(err, check.IsNil)
+ var loaded map[string]interface{}
+ buf, err := yaml.Marshal(cfg)
+ c.Assert(err, check.IsNil)
+ err = yaml.Unmarshal(buf, &loaded)
+ c.Assert(err, check.IsNil)
+
+ logExtraKeys(logger, loaded, supplied, "")
+ c.Check(logbuf.String(), check.Equals, "")
+}
+
func (s *LoadSuite) TestNoWarningsForDumpedConfig(c *check.C) {
var logbuf bytes.Buffer
logger := logrus.New()
cluster := &arvados.Cluster{
ClusterID: "zhome",
PostgreSQL: integrationTestCluster().PostgreSQL,
- TLS: arvados.TLS{Insecure: true},
- API: arvados.API{
- MaxItemsPerResponse: 1000,
- MaxRequestAmplification: 4,
- },
}
+ cluster.TLS.Insecure = true
+ cluster.API.MaxItemsPerResponse = 1000
+ cluster.API.MaxRequestAmplification = 4
arvadostest.SetServiceURL(&cluster.Services.RailsAPI, "http://localhost:1/")
arvadostest.SetServiceURL(&cluster.Services.Controller, "http://localhost:/")
s.testHandler = &Handler{Cluster: cluster}
package controller
import (
+ "bytes"
"context"
"database/sql"
"errors"
"fmt"
+ "io"
"net/http"
"net/url"
"strings"
"sync"
"time"
+ "git.curoverse.com/arvados.git/lib/config"
"git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/health"
"git.curoverse.com/arvados.git/sdk/go/httpserver"
Prefix: "/_health/",
Routes: health.Routes{"ping": func() error { _, err := h.db(&http.Request{}); return err }},
})
+
+ mux.Handle("/arvados/v1/config", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+ var buf bytes.Buffer
+ err := config.ExportJSON(&buf, h.Cluster)
+ if err != nil {
+ httpserver.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ w.Header().Set("Content-Type", "application/json")
+ io.Copy(w, &buf)
+ }))
+
hs := http.NotFoundHandler()
hs = prepend(hs, h.proxyRailsAPI)
hs = h.setupProxyRemoteCluster(hs)
s.cluster = &arvados.Cluster{
ClusterID: "zzzzz",
PostgreSQL: integrationTestCluster().PostgreSQL,
- TLS: arvados.TLS{Insecure: true},
}
+ s.cluster.TLS.Insecure = true
arvadostest.SetServiceURL(&s.cluster.Services.RailsAPI, "https://"+os.Getenv("ARVADOS_TEST_API_HOST"))
arvadostest.SetServiceURL(&s.cluster.Services.Controller, "http://localhost:/")
s.handler = newHandler(s.ctx, s.cluster, "")
s.cancel()
}
+func (s *HandlerSuite) TestConfigExport(c *check.C) {
+ s.cluster.ManagementToken = "secret"
+ s.cluster.SystemRootToken = "secret"
+ s.cluster.Collections.BlobSigning = true
+ s.cluster.Collections.BlobSigningTTL = arvados.Duration(23 * time.Second)
+ req := httptest.NewRequest("GET", "/arvados/v1/config", nil)
+ resp := httptest.NewRecorder()
+ s.handler.ServeHTTP(resp, req)
+ c.Check(resp.Code, check.Equals, http.StatusOK)
+ var cluster arvados.Cluster
+ c.Log(resp.Body.String())
+ err := json.Unmarshal(resp.Body.Bytes(), &cluster)
+ c.Check(err, check.IsNil)
+ c.Check(cluster.ManagementToken, check.Equals, "")
+ c.Check(cluster.SystemRootToken, check.Equals, "")
+ c.Check(cluster.Collections.BlobSigning, check.DeepEquals, true)
+ c.Check(cluster.Collections.BlobSigningTTL, check.Equals, arvados.Duration(23*time.Second))
+}
+
func (s *HandlerSuite) TestProxyDiscoveryDoc(c *check.C) {
req := httptest.NewRequest("GET", "/discovery/v1/apis/arvados/v1/rest", nil)
resp := httptest.NewRecorder()
handler := &Handler{Cluster: &arvados.Cluster{
ClusterID: "zzzzz",
PostgreSQL: integrationTestCluster().PostgreSQL,
- TLS: arvados.TLS{Insecure: true},
}}
+ handler.Cluster.TLS.Insecure = true
arvadostest.SetServiceURL(&handler.Cluster.Services.RailsAPI, "https://"+os.Getenv("ARVADOS_TEST_API_HOST"))
arvadostest.SetServiceURL(&handler.Cluster.Services.Controller, "http://localhost:/")
disp.sshKey = key
}
- instanceSet, err := newInstanceSet(disp.Cluster, disp.InstanceSetID, disp.logger)
+ disp.reg = prometheus.NewRegistry()
+ instanceSet, err := newInstanceSet(disp.Cluster, disp.InstanceSetID, disp.logger, disp.reg)
if err != nil {
disp.logger.Fatalf("error initializing driver: %s", err)
}
disp.instanceSet = instanceSet
- disp.reg = prometheus.NewRegistry()
disp.pool = worker.NewPool(disp.logger, disp.ArvClient, disp.reg, disp.InstanceSetID, disp.instanceSet, disp.newExecutor, disp.sshKey.PublicKey(), disp.Cluster)
disp.queue = container.NewQueue(disp.logger, disp.reg, disp.typeChooser, disp.ArvClient)
}
s.cluster = &arvados.Cluster{
+ ManagementToken: "test-management-token",
Containers: arvados.ContainersConfig{
DispatchPrivateKey: string(dispatchprivraw),
StaleLockTimeout: arvados.Duration(5 * time.Millisecond),
c.Fatalf("timed out with %d containers (%v), %d instances (%+v)", len(ents), ents, len(insts), insts)
}
}
+
+ req := httptest.NewRequest("GET", "/metrics", nil)
+ req.Header.Set("Authorization", "Bearer "+s.cluster.ManagementToken)
+ resp := httptest.NewRecorder()
+ s.disp.ServeHTTP(resp, req)
+ c.Check(resp.Code, check.Equals, http.StatusOK)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*driver_operations{error="0",operation="Create"} [^0].*`)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*driver_operations{error="0",operation="List"} [^0].*`)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*driver_operations{error="0",operation="Destroy"} [^0].*`)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*driver_operations{error="1",operation="Create"} [^0].*`)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*driver_operations{error="1",operation="List"} 0\n.*`)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*instances_disappeared{state="shutdown"} [^0].*`)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*instances_disappeared{state="unknown"} 0\n.*`)
}
func (s *DispatcherSuite) TestAPIPermissions(c *check.C) {
"git.curoverse.com/arvados.git/lib/cloud/azure"
"git.curoverse.com/arvados.git/lib/cloud/ec2"
"git.curoverse.com/arvados.git/sdk/go/arvados"
+ "github.com/prometheus/client_golang/prometheus"
"github.com/sirupsen/logrus"
"golang.org/x/crypto/ssh"
)
"ec2": ec2.Driver,
}
-func newInstanceSet(cluster *arvados.Cluster, setID cloud.InstanceSetID, logger logrus.FieldLogger) (cloud.InstanceSet, error) {
+func newInstanceSet(cluster *arvados.Cluster, setID cloud.InstanceSetID, logger logrus.FieldLogger, reg *prometheus.Registry) (cloud.InstanceSet, error) {
driver, ok := drivers[cluster.Containers.CloudVMs.Driver]
if !ok {
return nil, fmt.Errorf("unsupported cloud driver %q", cluster.Containers.CloudVMs.Driver)
}
sharedResourceTags := cloud.SharedResourceTags(cluster.Containers.CloudVMs.ResourceTags)
is, err := driver.InstanceSet(cluster.Containers.CloudVMs.DriverParameters, setID, sharedResourceTags, logger)
+ is = newInstrumentedInstanceSet(is, reg)
if maxops := cluster.Containers.CloudVMs.MaxCloudOpsPerSecond; maxops > 0 {
is = rateLimitedInstanceSet{
InstanceSet: is,
}).WithError(err).Debugf("filteringInstanceSet returning instances")
return returning, err
}
+
+func newInstrumentedInstanceSet(is cloud.InstanceSet, reg *prometheus.Registry) cloud.InstanceSet {
+ cv := prometheus.NewCounterVec(prometheus.CounterOpts{
+ Namespace: "arvados",
+ Subsystem: "dispatchcloud",
+ Name: "driver_operations",
+ Help: "Number of instance-create/destroy/list operations performed via cloud driver.",
+ }, []string{"operation", "error"})
+
+ // Create all counters, so they are reported with zero values
+ // (instead of being missing) until they are incremented.
+ for _, op := range []string{"Create", "List", "Destroy", "SetTags"} {
+ for _, error := range []string{"0", "1"} {
+ cv.WithLabelValues(op, error).Add(0)
+ }
+ }
+
+ reg.MustRegister(cv)
+ return instrumentedInstanceSet{is, cv}
+}
+
+type instrumentedInstanceSet struct {
+ cloud.InstanceSet
+ cv *prometheus.CounterVec
+}
+
+func (is instrumentedInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags cloud.InstanceTags, init cloud.InitCommand, pk ssh.PublicKey) (cloud.Instance, error) {
+ inst, err := is.InstanceSet.Create(it, image, tags, init, pk)
+ is.cv.WithLabelValues("Create", boolLabelValue(err != nil)).Inc()
+ return instrumentedInstance{inst, is.cv}, err
+}
+
+func (is instrumentedInstanceSet) Instances(tags cloud.InstanceTags) ([]cloud.Instance, error) {
+ instances, err := is.InstanceSet.Instances(tags)
+ is.cv.WithLabelValues("List", boolLabelValue(err != nil)).Inc()
+ var instrumented []cloud.Instance
+ for _, i := range instances {
+ instrumented = append(instrumented, instrumentedInstance{i, is.cv})
+ }
+ return instrumented, err
+}
+
+type instrumentedInstance struct {
+ cloud.Instance
+ cv *prometheus.CounterVec
+}
+
+func (inst instrumentedInstance) Destroy() error {
+ err := inst.Instance.Destroy()
+ inst.cv.WithLabelValues("Destroy", boolLabelValue(err != nil)).Inc()
+ return err
+}
+
+func (inst instrumentedInstance) SetTags(tags cloud.InstanceTags) error {
+ err := inst.Instance.SetTags(tags)
+ inst.cv.WithLabelValues("SetTags", boolLabelValue(err != nil)).Inc()
+ return err
+}
+
+func boolLabelValue(v bool) string {
+ if v {
+ return "1"
+ } else {
+ return "0"
+ }
+}
mInstancesPrice *prometheus.GaugeVec
mVCPUs *prometheus.GaugeVec
mMemory *prometheus.GaugeVec
+ mDisappearances *prometheus.CounterVec
}
type createCall struct {
Help: "Total memory on all cloud VMs.",
}, []string{"category"})
reg.MustRegister(wp.mMemory)
+ wp.mDisappearances = prometheus.NewCounterVec(prometheus.CounterOpts{
+ Namespace: "arvados",
+ Subsystem: "dispatchcloud",
+ Name: "instances_disappeared",
+ Help: "Number of occurrences of an instance disappearing from the cloud provider's list of instances.",
+ }, []string{"state"})
+ for _, v := range stateString {
+ wp.mDisappearances.WithLabelValues(v).Add(0)
+ }
+ reg.MustRegister(wp.mDisappearances)
}
func (wp *Pool) runMetrics() {
"WorkerState": wkr.state,
})
logger.Info("instance disappeared in cloud")
+ if wp.mDisappearances != nil {
+ wp.mDisappearances.WithLabelValues(stateString[wkr.state]).Inc()
+ }
delete(wp.workers, id)
go wkr.Close()
notify = true
if self.pipeline:
self.api.pipeline_instances().update(uuid=self.pipeline["uuid"],
body={"state": "Failed"}).execute(num_retries=self.num_retries)
- if runtimeContext.submit and isinstance(tool, Runner):
- runnerjob = tool
- if runnerjob.uuid and self.work_api == "containers":
- self.api.container_requests().update(uuid=runnerjob.uuid,
- body={"priority": "0"}).execute(num_retries=self.num_retries)
+
+ if self.work_api == "containers" and not current_container:
+ # Not running in a crunch container, so cancel any outstanding processes.
+ for p in self.processes:
+ try:
+ self.api.container_requests().update(uuid=p,
+ body={"priority": "0"}
+ ).execute(num_retries=self.num_retries)
+ except Exception:
+ pass
finally:
self.workflow_eval_lock.release()
self.task_queue.drain()
if obj.get("location", "").startswith("keep:") and "listing" in obj:
del obj["listing"]
+collection_pdh_path = re.compile(r'^keep:[0-9a-f]{32}\+\d+/.+$')
+collection_pdh_pattern = re.compile(r'^keep:([0-9a-f]{32}\+\d+)(/.*)?')
+collection_uuid_pattern = re.compile(r'^keep:([a-z0-9]{5}-4zz18-[a-z0-9]{15})(/.*)?$')
class ArvPathMapper(PathMapper):
"""Convert container-local paths to and from Keep collection ids."""
- pdh_path = re.compile(r'^keep:[0-9a-f]{32}\+\d+/.+$')
- pdh_dirpath = re.compile(r'^keep:[0-9a-f]{32}\+\d+(/.*)?$')
-
def __init__(self, arvrunner, referenced_files, input_basedir,
collection_pattern, file_pattern, name=None, single_collection=False):
self.arvrunner = arvrunner
if "#" in src:
src = src[:src.index("#")]
- if isinstance(src, basestring) and ArvPathMapper.pdh_dirpath.match(src):
- self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
- if arvados_cwl.util.collectionUUID in srcobj:
- self.pdh_to_uuid[src.split("/", 1)[0][5:]] = srcobj[arvados_cwl.util.collectionUUID]
-
debug = logger.isEnabledFor(logging.DEBUG)
+ if isinstance(src, basestring) and src.startswith("keep:"):
+ if collection_pdh_pattern.match(src):
+ self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
+ if arvados_cwl.util.collectionUUID in srcobj:
+ self.pdh_to_uuid[src.split("/", 1)[0][5:]] = srcobj[arvados_cwl.util.collectionUUID]
+ elif not collection_uuid_pattern.match(src):
+ with SourceLine(srcobj, "location", WorkflowException, debug):
+ raise WorkflowException("Invalid keep reference '%s'" % src)
+
if src not in self._pathmap:
if src.startswith("file:"):
# Local FS ref, may need to be uploaded or may be on keep
import ruamel.yaml as yaml
import arvados_cwl.arvdocker
-from .pathmapper import ArvPathMapper, trim_listing
+from .pathmapper import ArvPathMapper, trim_listing, collection_pdh_pattern, collection_uuid_pattern
from ._version import __version__
from . import done
from . context import ArvRuntimeContext
if isinstance(primary, (Mapping, Sequence)):
set_secondary(fsaccess, builder, inputschema, None, primary, discovered)
-collection_uuid_pattern = re.compile(r'^keep:([a-z0-9]{5}-4zz18-[a-z0-9]{15})(/.*)?$')
-collection_pdh_pattern = re.compile(r'^keep:([0-9a-f]{32}\+\d+)(/.*)?')
-
def upload_dependencies(arvrunner, name, document_loader,
workflowobj, uri, loadref_run,
include_primary=True, discovered_secondaryfiles=None):
--- /dev/null
+cwlVersion: v1.0
+class: CommandLineTool
+requirements:
+ - class: InlineJavascriptRequirement
+arguments:
+ - ls
+ - -l
+ - $(inputs.hello)
+inputs:
+ hello:
+ type: File
+outputs: []
}
tool: 15241-writable-dir.cwl
doc: Test for writable collections
+
+- job: badkeep.yml
+ output: {}
+ should_fail: true
+ tool: 15295-bad-keep-ref.cwl
+ doc: Test checking for invalid keepref
--- /dev/null
+hello:
+ class: File
+ location: keep:/4d8a70b1e63b2aad6984e40e338e2373+69/hello.txt
}
}
-type API struct {
- MaxItemsPerResponse int
- MaxRequestAmplification int
- RequestTimeout Duration
-}
-
type Cluster struct {
ClusterID string `json:"-"`
ManagementToken string
Containers ContainersConfig
RemoteClusters map[string]RemoteCluster
PostgreSQL PostgreSQL
- API API
- SystemLogs SystemLogs
- TLS TLS
+
+ API struct {
+ AsyncPermissionsUpdateInterval Duration
+ DisabledAPIs []string
+ MaxIndexDatabaseRead int
+ MaxItemsPerResponse int
+ MaxRequestAmplification int
+ MaxRequestSize int
+ RailsSessionSecretToken string
+ RequestTimeout Duration
+ }
+ AuditLogs struct {
+ MaxAge Duration
+ MaxDeleteBatch int
+ UnloggedAttributes []string
+ }
+ Collections struct {
+ BlobSigning bool
+ BlobSigningKey string
+ DefaultReplication int
+ BlobSigningTTL Duration
+ DefaultTrashLifetime Duration
+ TrashSweepInterval Duration
+ CollectionVersioning bool
+ PreserveVersionIfIdle Duration
+ }
+ Git struct {
+ Repositories string
+ }
+ Login struct {
+ ProviderAppSecret string
+ ProviderAppID string
+ }
+ Mail struct {
+ MailchimpAPIKey string
+ MailchimpListID string
+ SendUserSetupNotificationEmail string
+ IssueReporterEmailFrom string
+ IssueReporterEmailTo string
+ SupportEmailAddress string
+ EmailFrom string
+ }
+ SystemLogs struct {
+ LogLevel string
+ Format string
+ MaxRequestLogParamsSize int
+ }
+ TLS struct {
+ Certificate string
+ Key string
+ Insecure bool
+ }
+ Users struct {
+ AdminNotifierEmailFrom string
+ AutoAdminFirstUser bool
+ AutoAdminUserWithEmail string
+ AutoSetupNewUsers bool
+ AutoSetupNewUsersWithRepository bool
+ AutoSetupNewUsersWithVmUUID string
+ AutoSetupUsernameBlacklist []string
+ EmailSubjectPrefix string
+ NewInactiveUserNotificationRecipients []string
+ NewUserNotificationRecipients []string
+ NewUsersAreActive bool
+ UserNotifierEmailFrom string
+ UserProfileNotificationAddress string
+ }
+ Workbench struct {
+ ActivationContactLink string
+ APIClientConnectTimeout Duration
+ APIClientReceiveTimeout Duration
+ APIResponseCompression bool
+ ApplicationMimetypesWithViewIcon map[string]struct{}
+ ArvadosDocsite string
+ ArvadosPublicDataDocURL string
+ EnableGettingStartedPopup bool
+ EnablePublicProjectsPage bool
+ FileViewersConfigURL string
+ LogViewerMaxBytes ByteSize
+ MultiSiteSearch bool
+ Repositories bool
+ RepositoryCache string
+ RunningJobLogRecordsToFetch int
+ SecretKeyBase string
+ SecretToken string
+ ShowRecentCollectionsOnDashboard bool
+ ShowUserAgreementInline bool
+ ShowUserNotifications bool
+ SiteName string
+ Theme string
+ UserProfileFormFields map[string]struct {
+ Type string
+ FormFieldTitle string
+ FormFieldDescription string
+ Required bool
+ }
+ UserProfileFormMessage string
+ VocabularyURL string
+ }
}
type Services struct {
- Controller Service
- DispatchCloud Service
- Health Service
- Keepbalance Service
- Keepproxy Service
- Keepstore Service
- Nodemanager Service
- RailsAPI Service
- WebDAV Service
- Websocket Service
- Workbench1 Service
- Workbench2 Service
+ Composer Service
+ Controller Service
+ DispatchCloud Service
+ GitHTTP Service
+ GitSSH Service
+ Health Service
+ Keepbalance Service
+ Keepproxy Service
+ Keepstore Service
+ Nodemanager Service
+ RailsAPI Service
+ SSO Service
+ WebDAVDownload Service
+ WebDAV Service
+ WebShell Service
+ Websocket Service
+ Workbench1 Service
+ Workbench2 Service
}
type Service struct {
- InternalURLs map[URL]ServiceInstance `json:",omitempty"`
+ InternalURLs map[URL]ServiceInstance
ExternalURL URL
}
type ServiceInstance struct{}
-type SystemLogs struct {
- LogLevel string
- Format string
- MaxRequestLogParamsSize int
-}
-
type PostgreSQL struct {
Connection PostgreSQLConnection
ConnectionPool int
type PostgreSQLConnection map[string]string
type RemoteCluster struct {
- // API endpoint host or host:port; default is {id}.arvadosapi.com
- Host string
- // Perform a proxy request when a local client requests an
- // object belonging to this remote.
- Proxy bool
- // Scheme, default "https". Can be set to "http" for testing.
- Scheme string
- // Disable TLS verify. Can be set to true for testing.
- Insecure bool
+ Host string
+ Proxy bool
+ Scheme string
+ Insecure bool
+ ActivateUsers bool
}
type InstanceType struct {
}
type ContainersConfig struct {
- CloudVMs CloudVMsConfig
- DispatchPrivateKey string
- StaleLockTimeout Duration
+ CloudVMs CloudVMsConfig
+ DefaultKeepCacheRAM ByteSize
+ DispatchPrivateKey string
+ LogReuseDecisions bool
+ MaxComputeVMs int
+ MaxDispatchAttempts int
+ MaxRetryAttempts int
+ StaleLockTimeout Duration
+ SupportedDockerImageFormats []string
+ UsePreemptibleInstances bool
+
+ JobsAPI struct {
+ Enable string
+ GitInternalDir string
+ DefaultDockerImage string
+ CrunchJobWrapper string
+ CrunchJobUser string
+ CrunchRefreshTrigger string
+ ReuseJobIfOutputsDiffer bool
+ }
+ Logging struct {
+ MaxAge Duration
+ LogBytesPerEvent int
+ LogSecondsBetweenEvents int
+ LogThrottlePeriod Duration
+ LogThrottleBytes int
+ LogThrottleLines int
+ LimitLogBytesPerJob int
+ LogPartialLineThrottlePeriod Duration
+ LogUpdatePeriod Duration
+ LogUpdateSize ByteSize
+ }
+ SLURM struct {
+ Managed struct {
+ DNSServerConfDir string
+ DNSServerConfTemplate string
+ DNSServerReloadCommand string
+ DNSServerUpdateCommand string
+ ComputeNodeDomain string
+ ComputeNodeNameservers []string
+ AssignNodeHostname string
+ }
+ }
}
type CloudVMsConfig struct {
ServiceNameKeepstore: svcs.Keepstore,
}
}
-
-type TLS struct {
- Certificate string
- Key string
- Insecure bool
-}
if data[0] == '"' {
return d.Set(string(data[1 : len(data)-1]))
}
- return fmt.Errorf("duration must be given as a string like \"600s\" or \"1h30m\"")
+ // Mimic error message returned by ParseDuration for a number
+ // without units.
+ return fmt.Errorf("missing unit in duration %s", data)
}
// MarshalJSON implements json.Marshaler.
c.Check(string(buf), check.Equals, `"`+trial.out+`"`)
}
}
+
+func (s *DurationSuite) TestUnmarshalJSON(c *check.C) {
+ var d struct {
+ D Duration
+ }
+ err := json.Unmarshal([]byte(`{"D":1.234}`), &d)
+ c.Check(err, check.ErrorMatches, `missing unit in duration 1.234`)
+ err = json.Unmarshal([]byte(`{"D":"1.234"}`), &d)
+ c.Check(err, check.ErrorMatches, `.*missing unit in duration 1.234`)
+ err = json.Unmarshal([]byte(`{"D":"1"}`), &d)
+ c.Check(err, check.ErrorMatches, `.*missing unit in duration 1`)
+ err = json.Unmarshal([]byte(`{"D":"foobar"}`), &d)
+ c.Check(err, check.ErrorMatches, `.*invalid duration foobar`)
+ err = json.Unmarshal([]byte(`{"D":"60s"}`), &d)
+ c.Check(err, check.IsNil)
+ c.Check(d.D.Duration(), check.Equals, time.Minute)
+}
else:
try:
save_version(setup_dir, module, git_latest_tag() + git_timestamp_tag())
- except subprocess.CalledProcessError:
+ except (subprocess.CalledProcessError, OSError):
pass
return read_version(setup_dir, module)
def update
@object.with_lock do
- @object.reload
super
end
end
super
if action_name == 'lock' || action_name == 'unlock'
# Avoid loading more fields than we need
- @objects = @objects.select(:id, :uuid, :state, :priority, :auth_uuid, :locked_by_uuid)
+ @objects = @objects.select(:id, :uuid, :state, :priority, :auth_uuid, :locked_by_uuid, :lock_count)
@select = %w(uuid state priority auth_uuid locked_by_uuid)
end
end
end.map(&:name)
end
+ def self.full_text_coalesce
+ full_text_searchable_columns.collect do |column|
+ is_jsonb = self.columns.select{|x|x.name == column}[0].type == :jsonb
+ cast = (is_jsonb || serialized_attributes[column]) ? '::text' : ''
+ "coalesce(#{column}#{cast},'')"
+ end
+ end
+
+ def self.full_text_trgm
+ "(#{full_text_coalesce.join(" || ' ' || ")})"
+ end
+
def self.full_text_tsvector
parts = full_text_searchable_columns.collect do |column|
is_jsonb = self.columns.select{|x|x.name == column}[0].type == :jsonb
before_validation :fill_field_defaults, :if => :new_record?
before_validation :set_timestamps
+ before_validation :check_lock
+ before_validation :check_unlock
validates :command, :container_image, :output_path, :cwd, :priority, { presence: true }
validates :priority, numericality: { only_integer: true, greater_than_or_equal_to: 0 }
validate :validate_runtime_status
t.add :scheduling_parameters
t.add :runtime_user_uuid
t.add :runtime_auth_scopes
+ t.add :lock_count
end
# Supported states for a container
nil
end
- def check_lock_fail
- if self.state != Queued
- raise LockFailedError.new("cannot lock when #{self.state}")
- elsif self.priority <= 0
- raise LockFailedError.new("cannot lock when priority<=0")
+ def lock
+ self.with_lock do
+ if self.state != Queued
+ raise LockFailedError.new("cannot lock when #{self.state}")
+ end
+ self.update_attributes!(state: Locked)
end
end
- def lock
- # Check invalid state transitions once before getting the lock
- # (because it's cheaper that way) and once after getting the lock
- # (because state might have changed while acquiring the lock).
- check_lock_fail
- transaction do
- reload
- check_lock_fail
- update_attributes!(state: Locked, lock_count: self.lock_count+1)
+ def check_lock
+ if state_was == Queued and state == Locked
+ if self.priority <= 0
+ raise LockFailedError.new("cannot lock when priority<=0")
+ end
+ self.lock_count = self.lock_count+1
end
end
- def check_unlock_fail
- if self.state != Locked
- raise InvalidStateTransitionError.new("cannot unlock when #{self.state}")
- elsif self.locked_by_uuid != current_api_client_authorization.uuid
- raise InvalidStateTransitionError.new("locked by a different token")
+ def unlock
+ self.with_lock do
+ if self.state != Locked
+ raise InvalidStateTransitionError.new("cannot unlock when #{self.state}")
+ end
+ self.update_attributes!(state: Queued)
end
end
- def unlock
- # Check invalid state transitions twice (see lock)
- check_unlock_fail
- transaction do
- reload(lock: 'FOR UPDATE')
- check_unlock_fail
- if self.lock_count < Rails.configuration.Containers.MaxDispatchAttempts
- update_attributes!(state: Queued)
- else
- update_attributes!(state: Cancelled,
- runtime_status: {
- error: "Container exceeded 'max_container_dispatch_attempts' (lock_count=#{self.lock_count}."
- })
+ def check_unlock
+ if state_was == Locked and state == Queued
+ if self.locked_by_uuid != current_api_client_authorization.uuid
+ raise ArvadosModel::PermissionDeniedError.new("locked by a different token")
+ end
+ if self.lock_count >= Rails.configuration.Containers.MaxDispatchAttempts
+ self.state = Cancelled
+ self.runtime_status = {error: "Failed to start container. Cancelled after exceeding 'Containers.MaxDispatchAttempts' (lock_count=#{self.lock_count})"}
end
end
end
--- /dev/null
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+class AddTrigramIndexForTextSearch < ActiveRecord::Migration[5.0]
+ def trgm_indexes
+ {
+ "collections" => "collections_trgm_text_search_idx",
+ "container_requests" => "container_requests_trgm_text_search_idx",
+ "groups" => "groups_trgm_text_search_idx",
+ "jobs" => "jobs_trgm_text_search_idx",
+ "pipeline_instances" => "pipeline_instances_trgm_text_search_idx",
+ "pipeline_templates" => "pipeline_templates_trgm_text_search_idx",
+ "workflows" => "workflows_trgm_text_search_idx",
+ }
+ end
+
+ def up
+ begin
+ execute "CREATE EXTENSION IF NOT EXISTS pg_trgm"
+ rescue ActiveRecord::StatementInvalid => e
+ puts "Cannot create the pg_trgm extension."
+ if e.cause.is_a?(PG::InsufficientPrivilege)
+ puts "The user must have a SUPERUSER role."
+ elsif e.cause.is_a?(PG::UndefinedFile)
+ puts "The postgresql-contrib package is most likely not installed."
+ else
+ puts "Unknown Error."
+ end
+ puts "Please visit https://doc.arvados.org/admin/upgrading.html for instructions on how to run this migration."
+ throw e
+ end
+
+ trgm_indexes.each do |model, indx|
+ execute "CREATE INDEX #{indx} ON #{model} USING gin((#{model.classify.constantize.full_text_trgm}) gin_trgm_ops)"
+ end
+ end
+
+ def down
+ trgm_indexes.each do |_, indx|
+ execute "DROP INDEX IF EXISTS #{indx}"
+ end
+ end
+end
SET standard_conforming_strings = on;
SELECT pg_catalog.set_config('search_path', '', false);
SET check_function_bodies = false;
+SET xmloption = content;
SET client_min_messages = warning;
--
-- COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';
+--
+-- Name: pg_trgm; Type: EXTENSION; Schema: -; Owner: -
+--
+
+CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public;
+
+
+--
+-- Name: EXTENSION pg_trgm; Type: COMMENT; Schema: -; Owner: -
+--
+
+-- COMMENT ON EXTENSION pg_trgm IS 'text similarity measurement and index searching based on trigrams';
+
+
SET default_tablespace = '';
SET default_with_oids = false;
CREATE INDEX collections_search_index ON public.collections USING btree (owner_uuid, modified_by_client_uuid, modified_by_user_uuid, portable_data_hash, uuid, name, current_version_uuid);
+--
+-- Name: collections_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX collections_trgm_text_search_idx ON public.collections USING gin (((((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || COALESCE(file_names, ''::text))) public.gin_trgm_ops);
+
+
--
-- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX container_requests_search_index ON public.container_requests USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, state, requesting_container_uuid, container_uuid, container_image, cwd, output_path, output_uuid, log_uuid, output_name);
+--
+-- Name: container_requests_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX container_requests_trgm_text_search_idx ON public.container_requests USING gin (((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE((properties)::text, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text)) public.gin_trgm_ops);
+
+
--
-- Name: containers_search_index; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX groups_search_index ON public.groups USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, group_class);
+--
+-- Name: groups_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX groups_trgm_text_search_idx ON public.groups USING gin (((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text) || ' '::text) || COALESCE((properties)::text, ''::text))) public.gin_trgm_ops);
+
+
--
-- Name: humans_search_index; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX jobs_search_index ON public.jobs USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, submit_id, script, script_version, cancelled_by_client_uuid, cancelled_by_user_uuid, output, is_locked_by_uuid, log, repository, supplied_script_version, docker_image_locator, state, arvados_sdk_version);
+--
+-- Name: jobs_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX jobs_trgm_text_search_idx ON public.jobs USING gin (((((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text))) public.gin_trgm_ops);
+
+
--
-- Name: keep_disks_search_index; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX pipeline_instances_search_index ON public.pipeline_instances USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, pipeline_template_uuid, name, state);
+--
+-- Name: pipeline_instances_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX pipeline_instances_trgm_text_search_idx ON public.pipeline_instances USING gin (((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)) public.gin_trgm_ops);
+
+
--
-- Name: pipeline_template_owner_uuid_name_unique; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX pipeline_templates_search_index ON public.pipeline_templates USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name);
+--
+-- Name: pipeline_templates_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX pipeline_templates_trgm_text_search_idx ON public.pipeline_templates USING gin (((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)) public.gin_trgm_ops);
+
+
--
-- Name: repositories_search_index; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX workflows_search_idx ON public.workflows USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name);
+--
+-- Name: workflows_trgm_text_search_idx; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX workflows_trgm_text_search_idx ON public.workflows USING gin (((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text))) public.gin_trgm_ops);
+
+
--
-- PostgreSQL database dump complete
--
('20181213183234'),
('20190214214814'),
('20190322174136'),
-('20190422144631');
+('20190422144631'),
+('20190523180148');
if cfg[k].is_a? Integer
cfg[k] = cfg[k].seconds
elsif cfg[k].is_a? String
- cfg[k] = ConfigLoader.parse_duration cfg[k]
+ cfg[k] = ConfigLoader.parse_duration(cfg[k], cfgkey: cfgkey)
end
end
cfg[k] = URI(cfg[k])
end
+ if cfgtype == Integer && cfg[k].is_a?(String)
+ v = cfg[k].sub(/B\s*$/, '')
+ if mt = /(-?\d*\.?\d+)\s*([KMGTPE]i?)$/.match(v)
+ if mt[1].index('.')
+ v = mt[1].to_f
+ else
+ v = mt[1].to_i
+ end
+ cfg[k] = v * {
+ 'K' => 1000,
+ 'Ki' => 1 << 10,
+ 'M' => 1000000,
+ 'Mi' => 1 << 20,
+ "G" => 1000000000,
+ "Gi" => 1 << 30,
+ "T" => 1000000000000,
+ "Ti" => 1 << 40,
+ "P" => 1000000000000000,
+ "Pi" => 1 << 50,
+ "E" => 1000000000000000000,
+ "Ei" => 1 << 60,
+ }[mt[2]]
+ end
+ end
+
if !cfg[k].is_a? cfgtype
raise "#{cfgkey} expected #{cfgtype} but was #{cfg[k].class}"
end
end
end
- def self.parse_duration durstr
- duration_re = /(\d+(\.\d+)?)(s|m|h)/
+ def self.parse_duration durstr, cfgkey:
+ duration_re = /-?(\d+(\.\d+)?)(s|m|h)/
dursec = 0
while durstr != ""
mt = duration_re.match durstr
if !mt
- raise "#{cfgkey} not a valid duration: '#{cfg[k]}', accepted suffixes are s, m, h"
+ raise "#{cfgkey} not a valid duration: '#{durstr}', accepted suffixes are s, m, h"
end
multiplier = {s: 1, m: 60, h: 3600}
dursec += (Float(mt[1]) * multiplier[mt[3].to_sym])
cond_out = []
+ if attrs_in == 'any' && (operator.casecmp('ilike').zero? || operator.casecmp('like').zero?) && (operand.is_a? String) && operand.match('^[%].*[%]$')
+ # Trigram index search
+ cond_out << model_class.full_text_trgm + " #{operator} ?"
+ param_out << operand
+ # Skip the generic per-column operator loop below
+ attrs = []
+ end
+
if operator == '@@'
# Full-text search
if attrs_in != 'any'
priority: 2
created_at: <%= 2.minute.ago.to_s(:db) %>
updated_at: <%= 2.minute.ago.to_s(:db) %>
+ modified_at: <%= 2.minute.ago.to_s(:db) %>
container_image: test
cwd: test
output_path: test
authorize_with :dispatch2
uuid = containers(:locked).uuid
post :unlock, params: {id: uuid}
- assert_response 422
+ assert_response 403
end
[
end
end
+ [
+ %w[collections collections_trgm_text_search_idx],
+ %w[container_requests container_requests_trgm_text_search_idx],
+ %w[groups groups_trgm_text_search_idx],
+ %w[jobs jobs_trgm_text_search_idx],
+ %w[pipeline_instances pipeline_instances_trgm_text_search_idx],
+ %w[pipeline_templates pipeline_templates_trgm_text_search_idx],
+ %w[workflows workflows_trgm_text_search_idx]
+ ].each do |model|
+ table = model[0]
+ indexname = model[1]
+ test "trigram index exists on #{table} model" do
+ table_class = table.classify.constantize
+ expect = table_class.full_text_searchable_columns
+ ok = false
+ conn = ActiveRecord::Base.connection
+ conn.exec_query("SELECT indexdef FROM pg_indexes WHERE tablename = '#{table}' AND indexname = '#{indexname}'").each do |res|
+ searchable = res['indexdef'].scan(/COALESCE\(+([A-Za-z_]+)/).flatten
+ ok = (expect == searchable)
+ assert ok, "Invalid or no trigram index on #{table} named #{indexname}\nexpect: #{expect.inspect}\nfound: #{searchable}"
+ end
+ end
+ end
+
test "selectable_attributes includes database attributes" do
assert_includes(Job.selectable_attributes, "success")
end
/var/lib/arvbox/service/api/run-service --only-setup
fi
+ interactive=""
+ if [[ -z "$@" ]] ; then
+ interactive=--interactive
+ fi
+
docker exec -ti \
-e LINES=$(tput lines) \
-e COLUMNS=$(tput cols) \
/usr/local/lib/arvbox/runsu.sh \
/usr/src/arvados/build/run-tests.sh \
--temp /var/lib/arvados/test \
+ $interactive \
"$@"
elif [[ "$CONFIG" = devenv ]] ; then
if [[ $need_setup = 1 ]] ; then
RUN apt-get update && \
apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \
- postgresql-9.6 git build-essential runit curl libpq-dev \
+ postgresql-9.6 postgresql-contrib-9.6 git build-essential runit curl libpq-dev \
libcurl4-openssl-dev libssl1.0-dev zlib1g-dev libpcre3-dev \
openssh-server python-setuptools netcat-traditional \
python-epydoc graphviz bzip2 less sudo virtualenv \
if ! (psql postgres -c "\du" | grep "^ arvados ") >/dev/null ; then
psql postgres -c "create user arvados with password '$database_pw'"
- psql postgres -c "ALTER USER arvados CREATEDB;"
fi
+psql postgres -c "ALTER USER arvados WITH SUPERUSER;"
sed "s/password:.*/password: $database_pw/" <config/database.yml.example >config/database.yml