Merge branch '15106-trgm-text-search'
authorEric Biagiotti <ebiagiotti@veritasgenetics.com>
Tue, 18 Jun 2019 14:06:43 +0000 (10:06 -0400)
committerEric Biagiotti <ebiagiotti@veritasgenetics.com>
Tue, 18 Jun 2019 14:06:43 +0000 (10:06 -0400)
refs #15106

Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti@veritasgenetics.com>

52 files changed:
.licenseignore
apps/workbench/app/views/projects/_container_summary.html.erb [new file with mode: 0644]
apps/workbench/app/views/projects/_show_dashboard.html.erb
doc/_includes/_federated_cwl.liquid
doc/install/crunch2-slurm/install-test.html.textile.liquid
doc/install/install-controller.html.textile.liquid
doc/sdk/cli/subcommands.html.textile.liquid
doc/sdk/python/sdk-python.html.textile.liquid
doc/user/cwl/federated-workflows.html.textile.liquid
doc/user/cwl/federated/FileOnCluster.yml [new file with mode: 0644]
doc/user/cwl/federated/cat.cwl [deleted file]
doc/user/cwl/federated/colors_to_select.txt [new file with mode: 0644]
doc/user/cwl/federated/extract.cwl [new file with mode: 0644]
doc/user/cwl/federated/extract.py [new file with mode: 0644]
doc/user/cwl/federated/feddemo.cwl [moved from doc/user/cwl/federated/federated.cwl with 51% similarity]
doc/user/cwl/federated/file-on-clsr1.dat [deleted file]
doc/user/cwl/federated/file-on-clsr2.dat [deleted file]
doc/user/cwl/federated/file-on-clsr3.dat [deleted file]
doc/user/cwl/federated/items1.csv [new file with mode: 0644]
doc/user/cwl/federated/items2.csv [new file with mode: 0644]
doc/user/cwl/federated/items3.csv [new file with mode: 0644]
doc/user/cwl/federated/md5sum.cwl [deleted file]
doc/user/cwl/federated/merge.cwl [new file with mode: 0644]
doc/user/cwl/federated/merge.py [new file with mode: 0644]
doc/user/cwl/federated/shards.yml
doc/user/topics/keep.html.textile.liquid
doc/user/topics/tutorial-gatk-variantfiltration.html.textile.liquid
doc/user/topics/tutorial-job1.html.textile.liquid
doc/user/topics/tutorial-parallel.html.textile.liquid
doc/user/tutorials/tutorial-keep.html.textile.liquid
lib/config/cmd.go
lib/config/cmd_test.go
lib/config/config.default.yml
lib/config/export.go [new file with mode: 0644]
lib/config/export_test.go [new file with mode: 0644]
lib/config/generated_config.go
lib/config/load_test.go
lib/controller/federation_test.go
lib/controller/handler.go
lib/controller/handler_test.go
lib/controller/server_test.go
lib/dispatchcloud/dispatcher.go
lib/dispatchcloud/dispatcher_test.go
lib/dispatchcloud/driver.go
lib/dispatchcloud/worker/pool.go
sdk/go/arvados/config.go
sdk/go/arvados/duration.go
sdk/go/arvados/duration_test.go
sdk/python/arvados_version.py
services/api/lib/config_loader.rb
services/api/test/fixtures/containers.yml
tools/arvbox/bin/arvbox

index a9b6f5f6cafdbedb1a202f5f9c0e3526e7a54549..28ddf9c290a2a77adcb1f60b8ecbb806a81d48fd 100644 (file)
@@ -15,6 +15,7 @@ build/package-test-dockerfiles/ubuntu1604/etc-apt-preferences.d-arvados
 doc/fonts/*
 doc/_includes/_config_default_yml.liquid
 doc/user/cwl/federated/*
+doc/_includes/_federated_cwl.liquid
 */docker_image
 docker/jobs/apt.arvados.org*.list
 docker/jobs/1078ECD7.key
diff --git a/apps/workbench/app/views/projects/_container_summary.html.erb b/apps/workbench/app/views/projects/_container_summary.html.erb
new file mode 100644 (file)
index 0000000..2df4d81
--- /dev/null
@@ -0,0 +1,42 @@
+<%# Copyright (C) The Arvados Authors. All rights reserved.
+
+SPDX-License-Identifier: AGPL-3.0 %>
+
+<div class="compute-summary-numbers">
+  <table>
+      <colgroup>
+        <col width="50%">
+        <col width="50%">
+      </colgroup>
+      <tr>
+        <th>Pending containers</th>
+       <th>Running containers</th>
+      </tr>
+      <tr>
+       <% pending_containers = Container.order("created_at asc").filter([["state", "in", ["Queued", "Locked"]], ["priority", ">", 0]]).limit(1) %>
+       <% running_containers = Container.order("started_at asc").where(state: "Running").limit(1) %>
+        <td><%= pending_containers.items_available %></td>
+        <td><%= running_containers.items_available %></td>
+      </tr>
+      <tr>
+       <th>Oldest pending</th>
+       <th>Longest running</th>
+      </tr>
+      <tr>
+        <td><% if pending_containers.first then %>
+           <%= link_to_if_arvados_object pending_containers.first, link_text: render_runtime(Time.now - pending_containers.first.created_at, false, false) %>
+         <% else %>
+           -
+         <% end %>
+       </td>
+
+        <td><% if running_containers.first then %>
+           <%= link_to_if_arvados_object running_containers.first, link_text: render_runtime(Time.now - running_containers.first.created_at, false, false) %>
+         <% else %>
+           -
+         <% end %>
+       </td>
+      </tr>
+    </table>
+
+</div>
index d4ea2de155af7384a844d0c4593314ebd201867c..22d89fff6d8e9133b310f9a6efcc2a4d22040c69 100644 (file)
@@ -186,6 +186,16 @@ SPDX-License-Identifier: AGPL-3.0 %>
           </div>
         </div>
       </div>
+      <% end %>
+       <% if Container.api_exists?(:index) %>
+      <div class="panel panel-default" style="min-height: 10.5em">
+        <div class="panel-heading"><span class="panel-title">Container status</span></div>
+        <div class="panel-body containers-summary-pane">
+          <div>
+            <%= render partial: 'container_summary' %>
+         </div>
+       </div>
+      </div>
       <% end %>
       <% if Rails.configuration.show_recent_collections_on_dashboard %>
       <div class="panel panel-default">
index 59a629c5acf0d6db2ff8d9121fb8f16b26b5a715..cfe8407e2f75af222e5ffe34de0dd10d65b8d6fc 120000 (symlink)
@@ -1 +1 @@
-../user/cwl/federated/federated.cwl
\ No newline at end of file
+../user/cwl/federated/feddemo.cwl
\ No newline at end of file
index ca509bb7dcb4eeeb79a8a6c4a68c90b5b65457f1..03a5d18b4503aa32c02448dd82a6158a149f38c5 100644 (file)
@@ -106,7 +106,7 @@ You can use standard Keep tools to view the container's output and logs from the
 ./crunch-run.txt
 ./stderr.txt
 ./stdout.txt
-~$ <span class="userinput">arv keep get <b>a01df2f7e5bc1c2ad59c60a837e90dc6+166</b>/stdout.txt</span>
+~$ <span class="userinput">arv-get <b>a01df2f7e5bc1c2ad59c60a837e90dc6+166</b>/stdout.txt</span>
 2016-08-05T13:53:06.201011Z Hello, Crunch!
 </code></pre>
 </notextile>
index 394aa0fdf7801c074874cbbd500c07b6f5870f5b..f78467f5bebf60604e3aeba78c7a36d700ef94ce 100644 (file)
@@ -179,3 +179,19 @@ Confirm the service is listening on its assigned port and responding to requests
 {"errors":["Forbidden"],"error_token":"1533044555+684b532c"}
 </code></pre>
 </notextile>
+
+h3(#confirm-config). Confirm the public configuration is OK
+
+Confirm the publicly accessible configuration endpoint does not reveal any sensitive information (e.g., a secret that was mistakenly entered under the wrong configuration key). Use the jq program, if you have installed it, to make the JSON document easier to read.
+
+<notextile>
+<pre><code>~$ <span class="userinput">curl http://0.0.0.0:<b>9004</b>/arvados/v1/config | jq .</span>
+{
+  "API": {
+    "MaxItemsPerResponse": 1000,
+    "MaxRequestAmplification": 4,
+    "RequestTimeout": "5m"
+  },
+  ...
+</code></pre>
+</notextile>
index f9652ef3861502b135689ba9730fb1585732ef09..b23ec46fa75f35f1955aea204d3f01bd6407f365 100644 (file)
@@ -140,6 +140,8 @@ arv tag remove --all
 
 h3(#arv-ws). arv ws
 
+This is a frontend to @arv-ws@.
+
 @arv ws@ provides access to the websockets event stream.
 
 <notextile>
@@ -168,7 +170,7 @@ optional arguments:
 
 h3(#arv-keep). arv keep
 
-@arv keep@ provides access to the Keep storage service.
+@arv keep@ commands for accessing the Keep storage service.
 
 <notextile>
 <pre>
@@ -182,6 +184,8 @@ Available methods: ls, get, put, docker
 
 h3(#arv-keep-ls). arv keep ls
 
+This is a frontend to @arv-ls@.
+
 <notextile>
 <pre>
 $ <code class="userinput">arv keep ls --help</code>
@@ -203,6 +207,8 @@ optional arguments:
 
 h3(#arv-keep-get). arv keep get
 
+This is a frontend to @arv-get@.
+
 <notextile>
 <pre>
 $ <code class="userinput">arv keep get --help</code>
@@ -256,6 +262,8 @@ optional arguments:
 
 h3(#arv-keep-put). arv keep put
 
+This is a frontend to @arv-put@.
+
 <notextile>
 <pre>
 $ <code class="userinput">arv keep put --help</code>
@@ -335,6 +343,8 @@ optional arguments:
 
 h3(#arv-pipeline-run). arv pipeline run
 
+WARNING: this uses the obsolete "job" API.  Don't use this.  You should use @arvados-cwl-runner@ instead.
+
 @arv pipeline run@ can be used to start a pipeline run from the command line.
 
 The User Guide has a page with a bit more information on "using arv pipeline run":{{site.baseurl}}/user/topics/running-pipeline-command-line.html.
@@ -381,6 +391,8 @@ Options:
 
 h3(#arv-run). arv run
 
+WARNING: this uses the obsolete "job" API.  Don't use this.  You should use @arvados-cwl-runner@ instead.
+
 The @arv-run@ command creates Arvados pipelines at the command line that fan out to multiple concurrent tasks across Arvados compute nodes.
 
 The User Guide has a page on "using arv-run":{{site.baseurl}}/user/topics/arv-run.html.
index 9c0ec475b4328eda8875b161bbb77795ddbb2d53..c8b2b67b111166e0662e4c70e431c10a4ea0a3f1 100644 (file)
@@ -24,7 +24,13 @@ h3. Option 1: Install with pip
 
 This installation method is recommended to make the SDK available for use in your own Python programs. It can coexist with the system-wide installation method from a distribution package (option 2, below).
 
-Run @pip install arvados-python-client@ in an appropriate installation environment, such as a virtualenv.
+Run @pip install arvados-python-client@ in an appropriate installation environment, such as a @virtualenv@.
+
+The SDK uses @pycurl@ which depends on the @libcurl@ C library.  To build the module you may have to install additional packages.  On Debian 9 this is:
+
+<pre>
+$ apt-get install git build-essential python3-dev libcurl4-openssl-dev libssl1.0-dev
+</pre>
 
 If your version of @pip@ is 1.4 or newer, the @pip install@ command might give an error: "Could not find a version that satisfies the requirement arvados-python-client". If this happens, try @pip install --pre arvados-python-client@.
 
index 7e2150dccb20e7784785b36f6bd4abe1905ff25a..01d656dd1519ffa337d10c9bc1ce047c6a133f2e 100644 (file)
@@ -36,7 +36,7 @@ At this time, remote steps of a workflow on Workbench are not displayed.  As a w
 Run it like any other workflow:
 
 <notextile>
-<pre><code>~$ <span class="userinput">arvados-cwl-runner federated.cwl shards.cwl</span>
+<pre><code>~$ <span class="userinput">arvados-cwl-runner feddemo.cwl shards.cwl</span>
 </code></pre>
 </notextile>
 
diff --git a/doc/user/cwl/federated/FileOnCluster.yml b/doc/user/cwl/federated/FileOnCluster.yml
new file mode 100644 (file)
index 0000000..363d071
--- /dev/null
@@ -0,0 +1,5 @@
+name: FileOnCluster
+type: record
+fields:
+  file: File
+  cluster: string
\ No newline at end of file
diff --git a/doc/user/cwl/federated/cat.cwl b/doc/user/cwl/federated/cat.cwl
deleted file mode 100644 (file)
index 17132fe..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-cwlVersion: v1.0
-class: CommandLineTool
-inputs:
-  inp:
-    type: File[]
-    inputBinding: {}
-outputs:
-  joined: stdout
-stdout: joined.txt
-baseCommand: cat
diff --git a/doc/user/cwl/federated/colors_to_select.txt b/doc/user/cwl/federated/colors_to_select.txt
new file mode 100644 (file)
index 0000000..620b008
--- /dev/null
@@ -0,0 +1,2 @@
+green
+blue
diff --git a/doc/user/cwl/federated/extract.cwl b/doc/user/cwl/federated/extract.cwl
new file mode 100644 (file)
index 0000000..f8fdedb
--- /dev/null
@@ -0,0 +1,22 @@
+cwlVersion: v1.0
+class: CommandLineTool
+requirements:
+  SchemaDefRequirement:
+    types:
+      - $import: FileOnCluster.yml
+inputs:
+  select_column: string
+  select_values: File
+  dataset: 'FileOnCluster.yml#FileOnCluster'
+  extract_py:
+    type: File
+    default:
+      class: File
+      location: extract.py
+outputs:
+  out:
+    type: File
+    outputBinding:
+      glob: extracted.csv
+
+arguments: [python, $(inputs.extract_py), $(inputs.select_column), $(inputs.select_values), $(inputs.dataset.file), $(inputs.dataset.cluster)]
diff --git a/doc/user/cwl/federated/extract.py b/doc/user/cwl/federated/extract.py
new file mode 100644 (file)
index 0000000..2d2c49d
--- /dev/null
@@ -0,0 +1,31 @@
+import csv
+import sys
+
+select_column = sys.argv[1]
+select_values = sys.argv[2]
+dataset = sys.argv[3]
+cluster = sys.argv[4]
+
+sv = open(select_values, "rt")
+selectvals = [s.strip() for s in sv]
+
+print("selectvals", selectvals)
+
+ds = csv.reader(open(dataset, "rt"))
+header = next(ds)
+print("header is", header)
+columnindex = None
+for i,v in enumerate(header):
+    if v == select_column:
+        columnindex = i
+if columnindex is None:
+    raise Exception("Column %s not found" % select_column)
+
+print("column index", columnindex)
+
+ex = csv.writer(open("extracted.csv", "wt"))
+ex.writerow(["cluster"]+list(header))
+
+for row in ds:
+    if row[columnindex] in selectvals:
+        ex.writerow([cluster]+list(row))
similarity index 51%
rename from doc/user/cwl/federated/federated.cwl
rename to doc/user/cwl/federated/feddemo.cwl
index 5314a7675b2e6f64c08351cec9e2ccb893a77bab..a68ff444a67dd40db13b6aa47dce20e8afc67922 100644 (file)
@@ -1,8 +1,11 @@
-#
-# Demonstrate Arvados federation features.  This performs a parallel
-# scatter over some arbitrary number of files and federated clusters,
-# then joins the results.
-#
+# Demonstrate Arvados federation features.  This example searches a
+# list of CSV files that are hosted on different Arvados clusters.
+# For each file, send a task to the remote cluster which will scan
+# file and extracts the rows where the column "select_column" has one
+# of the values appearing in the "select_values" file.  The home
+# cluster then runs a task which pulls the results from the remote
+# clusters and merges the results to produce a final report.
+
 cwlVersion: v1.0
 class: Workflow
 $namespaces:
@@ -19,50 +22,45 @@ requirements:
     dockerPull: arvados/jobs
 
   # Define a record type so we can conveniently associate the input
-  # file, the cluster on which the file lives, and the project on that
-  # cluster that will own the container requests and intermediate
-  # outputs.
+  # file and the cluster where the task should run.
   SchemaDefRequirement:
     types:
-      - name: FileOnCluster
-        type: record
-        fields:
-          file: File
-          cluster: string
-          project: string
+      - $import: FileOnCluster.yml
 
 inputs:
-  # Expect an array of FileOnCluster records (defined above)
-  # as our input.
-  shards:
+  select_column: string
+  select_values: File
+
+  datasets:
     type:
       type: array
-      items: FileOnCluster
+      items: FileOnCluster.yml#FileOnCluster
+
+  intermediate_projects: string[]
 
 outputs:
   # Will produce an output file with the results of the distributed
-  # analysis jobs joined together.
+  # analysis jobs merged together.
   joined:
     type: File
-    outputSource: gather-results/joined
+    outputSource: gather-results/out
 
 steps:
   distributed-analysis:
     in:
-      # Take "shards" array as input, we scatter over it below.
-      shard: shards
-
-      # Use an expression to extract the "file" field to assign to the
-      # "inp" parameter of the tool.
-      inp: {valueFrom: $(inputs.shard.file)}
+      select_column: select_column
+      select_values: select_values
+      dataset: datasets
+      intermediate_projects: intermediate_projects
 
     # Scatter over shards, this means creating a parallel job for each
     # element in the "shards" array.  Expressions are evaluated for
     # each element.
-    scatter: shard
+    scatter: [dataset, intermediate_projects]
+    scatterMethod: dotproduct
 
-    # Specify the cluster target for this job.  This means each
-    # separate scatter job will execute on the cluster that was
+    # Specify the cluster target for this task.  This means each
+    # separate scatter task will execute on the cluster that was
     # specified in the "cluster" field.
     #
     # Arvados handles streaming data between clusters, for example,
@@ -71,17 +69,17 @@ steps:
     # the federation.
     hints:
       arv:ClusterTarget:
-        cluster_id: $(inputs.shard.cluster)
-        project_uuid: $(inputs.shard.project)
+        cluster_id: $(inputs.dataset.cluster)
+        project_uuid: $(inputs.intermediate_projects)
 
     out: [out]
-    run: md5sum.cwl
+    run: extract.cwl
 
   # Collect the results of the distributed step and join them into a
   # single output file.  Arvados handles streaming inputs,
   # intermediate results, and outputs between clusters on demand.
   gather-results:
     in:
-      inp: distributed-analysis/out
-    out: [joined]
-    run: cat.cwl
+      dataset: distributed-analysis/out
+    out: [out]
+    run: merge.cwl
diff --git a/doc/user/cwl/federated/file-on-clsr1.dat b/doc/user/cwl/federated/file-on-clsr1.dat
deleted file mode 100644 (file)
index e79f152..0000000
+++ /dev/null
@@ -1 +0,0 @@
-file-on-clsr1.dat
diff --git a/doc/user/cwl/federated/file-on-clsr2.dat b/doc/user/cwl/federated/file-on-clsr2.dat
deleted file mode 100644 (file)
index 9179dc8..0000000
+++ /dev/null
@@ -1 +0,0 @@
-file-on-clsr2.dat
diff --git a/doc/user/cwl/federated/file-on-clsr3.dat b/doc/user/cwl/federated/file-on-clsr3.dat
deleted file mode 100644 (file)
index 58b5902..0000000
+++ /dev/null
@@ -1 +0,0 @@
-file-on-clsr3.dat
diff --git a/doc/user/cwl/federated/items1.csv b/doc/user/cwl/federated/items1.csv
new file mode 100644 (file)
index 0000000..59d2d32
--- /dev/null
@@ -0,0 +1,29 @@
+color,item
+blue,ball
+yellow,ball
+red,ball
+green,book
+purple,book
+red,book
+yellow,flower
+purple,flower
+red,bicycle
+red,ball
+green,picture
+yellow,ball
+purple,flower
+yellow,ball
+green,bicycle
+orange,book
+green,book
+orange,picture
+blue,book
+orange,car
+yellow,flower
+purple,ball
+blue,book
+orange,book
+orange,book
+yellow,book
+orange,car
+yellow,car
diff --git a/doc/user/cwl/federated/items2.csv b/doc/user/cwl/federated/items2.csv
new file mode 100644 (file)
index 0000000..566dab7
--- /dev/null
@@ -0,0 +1,33 @@
+color,item
+green,bicycle
+red,flower
+blue,bicycle
+yellow,flower
+green,ball
+red,book
+red,bicycle
+yellow,ball
+blue,picture
+green,book
+orange,flower
+blue,ball
+orange,car
+green,book
+yellow,car
+orange,picture
+orange,car
+yellow,flower
+green,ball
+orange,car
+purple,book
+green,ball
+red,flower
+blue,car
+orange,flower
+blue,book
+blue,bicycle
+red,picture
+orange,flower
+orange,book
+blue,flower
+orange,book
diff --git a/doc/user/cwl/federated/items3.csv b/doc/user/cwl/federated/items3.csv
new file mode 100644 (file)
index 0000000..e820e45
--- /dev/null
@@ -0,0 +1,41 @@
+color,item
+purple,book
+green,book
+red,bicycle
+yellow,book
+orange,book
+green,car
+green,car
+blue,ball
+yellow,bicycle
+orange,book
+green,bicycle
+blue,flower
+red,bicycle
+purple,bicycle
+green,bicycle
+orange,ball
+yellow,car
+orange,ball
+red,ball
+red,car
+green,picture
+green,flower
+blue,picture
+green,car
+yellow,flower
+purple,flower
+green,ball
+yellow,bicycle
+orange,bicycle
+orange,flower
+yellow,picture
+purple,flower
+green,picture
+orange,car
+orange,picture
+yellow,car
+yellow,picture
+purple,picture
+purple,picture
+purple,flower
diff --git a/doc/user/cwl/federated/md5sum.cwl b/doc/user/cwl/federated/md5sum.cwl
deleted file mode 100644 (file)
index 9c78dc2..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-cwlVersion: v1.0
-class: CommandLineTool
-$namespaces:
-  arv: "http://arvados.org/cwl#"
-requirements:
-  InlineJavascriptRequirement: {}
-inputs:
-  inp:
-    type: File
-outputs:
-  out:
-    type: File
-    outputBinding:
-      glob: out.txt
-stdin: $(inputs.inp.path)
-stdout: out.txt
-arguments: ["md5sum", "-"]
diff --git a/doc/user/cwl/federated/merge.cwl b/doc/user/cwl/federated/merge.cwl
new file mode 100644 (file)
index 0000000..a60d619
--- /dev/null
@@ -0,0 +1,23 @@
+cwlVersion: v1.0
+class: CommandLineTool
+requirements:
+  SchemaDefRequirement:
+    types:
+      - $import: FileOnCluster.yml
+inputs:
+  dataset:
+    type: File[]
+    inputBinding:
+      position: 1
+  merge_py:
+    type: File
+    default:
+      class: File
+      location: merge.py
+outputs:
+  out:
+    type: File
+    outputBinding:
+      glob: merged.csv
+
+arguments: [python, $(inputs.merge_py)]
diff --git a/doc/user/cwl/federated/merge.py b/doc/user/cwl/federated/merge.py
new file mode 100644 (file)
index 0000000..03c79f2
--- /dev/null
@@ -0,0 +1,15 @@
+import sys
+import csv
+
+merged = open("merged.csv", "wt")
+
+wroteheader = False
+for s in sys.argv[1:]:
+    f = open(s, "rt")
+    header = next(f)
+    if not wroteheader:
+        merged.write(header)
+        wroteheader = True
+    for l in f:
+        merged.write(l)
+    f.close()
index ed8a83ab3f6ba19527b4dbd926f0bb6e838aacc0..14e346248dc3828c357a9c451bc30a0d2461c626 100644 (file)
@@ -1,18 +1,25 @@
-shards:
+select_column: color
+select_values:
+  class: File
+  location: colors_to_select.txt
+
+datasets:
   - cluster: clsr1
-    project: clsr1-j7d0g-qxc4jcji7n4lafx
     file:
       class: File
-      location: keep:485df2c5cec3207a32f49c42f1cdcca9+61/file-on-clsr1.dat
+      location: keep:0dcf9310e5bf0c07270416d3a0cd6a43+56/items1.csv
 
   - cluster: clsr2
-    project: clsr2-j7d0g-ivdrm1hyym21vkq
     file:
       class: File
-      location: keep:ae6e9c3e9bfa52a0122ecb489d8198ff+61/file-on-clsr2.dat
+      location: keep:12707d325a3f4687674b858bd32beae9+56/items2.csv
 
   - cluster: clsr3
-    project: clsr3-j7d0g-e3njz2s53lyb0ka
     file:
       class: File
-      location: keep:0b43a0ef9ea592d5d7b299978dfa8643+61/file-on-clsr3.dat
+      location: keep:dbff6bb7fc43176527af5eb9dec28871+56/items3.csv
+
+intermediate_projects:
+  - clsr1-j7d0g-qxc4jcji7n4lafx
+  - clsr2-j7d0g-e7r20egb8hlgn53
+  - clsr3-j7d0g-vrl00zoku9spnen
index dd504398e3617902e4cc6455108a2974027e4d8a..68b6a87d09de0c0c325097b48a488aa424cd6a1c 100644 (file)
@@ -19,20 +19,20 @@ In Keep, information is stored in *data blocks*.  Data blocks are normally betwe
 
 In order to reassemble the file, Keep stores a *collection* data block which lists in sequence the data blocks that make up the original file.  A collection data block may store the information for multiple files, including a directory structure.
 
-In this example we will use @c1bad4b39ca5a924e481008009d94e32+210@, which we added to Keep in "how to upload data":{{ site.baseurl }}/user/tutorials/tutorial-keep.html.  First let us examine the contents of this collection using @arv keep get@:
+In this example we will use @c1bad4b39ca5a924e481008009d94e32+210@, which we added to Keep in "how to upload data":{{ site.baseurl }}/user/tutorials/tutorial-keep.html.  First let us examine the contents of this collection using @arv-get@:
 
 <notextile>
-<pre><code>~$ <span class="userinput">arv keep get c1bad4b39ca5a924e481008009d94e32+210</span>
+<pre><code>~$ <span class="userinput">arv-get c1bad4b39ca5a924e481008009d94e32+210</span>
 . 204e43b8a1185621ca55a94839582e6f+67108864 b9677abbac956bd3e86b1deb28dfac03+67108864 fc15aff2a762b13f521baf042140acec+67108864 323d2a3ce20370c4ca1d3462a344f8fd+25885655 0:227212247:var-GS000016015-ASM.tsv.bz2
 </code></pre>
 </notextile>
 
-The command @arv keep get@ fetches the contents of the collection @c1bad4b39ca5a924e481008009d94e32+210@.  In this example, this collection includes a single file @var-GS000016015-ASM.tsv.bz2@ which is 227212247 bytes long, and is stored using four sequential data blocks, @204e43b8a1185621ca55a94839582e6f+67108864@, @b9677abbac956bd3e86b1deb28dfac03+67108864@, @fc15aff2a762b13f521baf042140acec+67108864@, and @323d2a3ce20370c4ca1d3462a344f8fd+25885655@.
+The command @arv-get@ fetches the contents of the collection @c1bad4b39ca5a924e481008009d94e32+210@.  In this example, this collection includes a single file @var-GS000016015-ASM.tsv.bz2@ which is 227212247 bytes long, and is stored using four sequential data blocks, @204e43b8a1185621ca55a94839582e6f+67108864@, @b9677abbac956bd3e86b1deb28dfac03+67108864@, @fc15aff2a762b13f521baf042140acec+67108864@, and @323d2a3ce20370c4ca1d3462a344f8fd+25885655@.
 
-Let's use @arv keep get@ to download the first data block:
+Let's use @arv-get@ to download the first data block:
 
 notextile. <pre><code>~$ <span class="userinput">cd /scratch/<b>you</b></span>
-/scratch/<b>you</b>$ <span class="userinput">arv keep get 204e43b8a1185621ca55a94839582e6f+67108864 &gt; block1</span></code></pre>
+/scratch/<b>you</b>$ <span class="userinput">arv-get 204e43b8a1185621ca55a94839582e6f+67108864 &gt; block1</span></code></pre>
 
 {% include 'notebox_begin' %}
 
@@ -40,7 +40,7 @@ When you run this command, you may get this API warning:
 
 notextile. <pre><code>WARNING:root:API lookup failed for collection 204e43b8a1185621ca55a94839582e6f+67108864 (&lt;class 'apiclient.errors.HttpError'&gt;: &lt;HttpError 404 when requesting https://qr1hi.arvadosapi.com/arvados/v1/collections/204e43b8a1185621ca55a94839582e6f%2B67108864?alt=json returned "Not Found"&gt;)</code></pre>
 
-This happens because @arv keep get@ tries to find a collection with this identifier.  When that fails, it emits this warning, then looks for a datablock instead, which succeeds.
+This happens because @arv-get@ tries to find a collection with this identifier.  When that fails, it emits this warning, then looks for a datablock instead, which succeeds.
 
 {% include 'notebox_end' %}
 
index 752488e157b7dbf5852d657a5a526f4693b65b0d..544ccbd35eec476b86478baa3594a9415bbf86d5 100644 (file)
@@ -18,7 +18,7 @@ h2. Installing GATK
 Download the GATK binary tarball[1] -- e.g., @GenomeAnalysisTK-2.6-4.tar.bz2@ -- and "copy it to your Arvados VM":{{site.baseurl}}/user/tutorials/tutorial-keep.html.
 
 <notextile>
-<pre><code>~$ <span class="userinput">arv keep put GenomeAnalysisTK-2.6-4.tar.bz2</span>
+<pre><code>~$ <span class="userinput">arv-put GenomeAnalysisTK-2.6-4.tar.bz2</span>
 c905c8d8443a9c44274d98b7c6cfaa32+94
 </code></pre>
 </notextile>
index 34c452a900be2546c314444a41a54b61a92a25e1..f7a206010184e7faf77255f4346774dd3a4147b0 100644 (file)
@@ -149,10 +149,10 @@ Now you can list the files in the collection:
 </code></pre>
 </notextile>
 
-This collection consists of the @md5sum.txt@ file.  Use @arv keep get@ to show the contents of the @md5sum.txt@ file:
+This collection consists of the @md5sum.txt@ file.  Use @arv-get@ to show the contents of the @md5sum.txt@ file:
 
 <notextile>
-<pre><code>~$ <span class="userinput">arv keep get dd755dbc8d49a67f4fe7dc843e4f10a6+54/md5sum.txt</span>
+<pre><code>~$ <span class="userinput">arv-get dd755dbc8d49a67f4fe7dc843e4f10a6+54/md5sum.txt</span>
 44b8ae3fde7a8a88d2f7ebd237625b4f ./var-GS000016015-ASM.tsv.bz2
 </code></pre>
 </notextile>
@@ -171,10 +171,10 @@ On the command line, the Keep identifier listed in the @"log"@ field from @arv j
 </code></pre>
 </notextile>
 
-The log collection consists of one log file named with the job's UUID.  You can access it using @arv keep get@:
+The log collection consists of one log file named with the job's UUID.  You can access it using @arv-get@:
 
 <notextile>
-<pre><code>~$ <span class="userinput">arv keep get xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx+91/qr1hi-8i9sb-xxxxxxxxxxxxxxx.log.txt</span>
+<pre><code>~$ <span class="userinput">arv-get xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx+91/qr1hi-8i9sb-xxxxxxxxxxxxxxx.log.txt</span>
 2013-12-16_20:44:35 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  check slurm allocation
 2013-12-16_20:44:35 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  node compute13 - 8 slots
 2013-12-16_20:44:36 qr1hi-8i9sb-xxxxxxxxxxxxxxx 7575  start
index 4ff402ea7fb2babb58489f03ce523fcd931ae68e..7d308dcff9c369e933f9edf4cc466b1cc64e04aa 100644 (file)
@@ -77,7 +77,7 @@ Because the job ran in concurrent, each instance of concurrent-hash creates a se
 <notextile>
 <pre><code>~/$USER/crunch_scripts$ <span class="userinput">arv keep ls e2ccd204bca37c77c0ba59fc470cd0f7+162</span>
 ./md5sum.txt
-~/$USER/crunch_scripts$ <span class="userinput">arv keep get e2ccd204bca37c77c0ba59fc470cd0f7+162/md5sum.txt</span>
+~/$USER/crunch_scripts$ <span class="userinput">arv-get e2ccd204bca37c77c0ba59fc470cd0f7+162/md5sum.txt</span>
 0f1d6bcf55c34bed7f92a805d2d89bbf alice.txt
 504938460ef369cd275e4ef58994cffe bob.txt
 8f3b36aff310e06f3c5b9e95678ff77a carol.txt
index 53cdfe4a08cb1ed07ca04ffe2e0182f9dbdcb471..ec7086db96d1fec397013eb24e53f0dd2681854b 100644 (file)
@@ -9,7 +9,7 @@ Copyright (C) The Arvados Authors. All rights reserved.
 SPDX-License-Identifier: CC-BY-SA-3.0
 {% endcomment %}
 
-Arvados Data collections can be uploaded using either the @arv keep put@ command line tool or using Workbench.
+Arvados Data collections can be uploaded using either the @arv-put@ command line tool or using Workbench.
 
 # "*Upload using command line tool*":#upload-using-command
 # "*Upload using Workbench*":#upload-using-workbench
@@ -20,9 +20,9 @@ h2(#upload-using-command). Upload using command line tool
 
 {% include 'tutorial_expectations' %}
 
-To upload a file to Keep using @arv keep put@:
+To upload a file to Keep using @arv-put@:
 <notextile>
-<pre><code>~$ <span class="userinput">arv keep put var-GS000016015-ASM.tsv.bz2</span>
+<pre><code>~$ <span class="userinput">arv-put var-GS000016015-ASM.tsv.bz2</span>
 216M / 216M 100.0%
 Collection saved as ...
 qr1hi-4zz18-xxxxxxxxxxxxxxx
@@ -34,27 +34,27 @@ The output value @qr1hi-4zz18-xxxxxxxxxxxxxxx@ is the uuid of the Arvados collec
 
 Note: The file used in this example is a freely available TSV file containing variant annotations from the "Personal Genome Project (PGP)":http://www.pgp-hms.org participant "hu599905":https://my.pgp-hms.org/profile/hu599905), downloadable "here":https://warehouse.pgp-hms.org/warehouse/f815ec01d5d2f11cb12874ab2ed50daa+234+K@ant/var-GS000016015-ASM.tsv.bz2. Alternatively, you can replace @var-GS000016015-ASM.tsv.bz2@ with the name of any file you have locally, or you could get the TSV file by "downloading it from Keep.":{{site.baseurl}}/user/tutorials/tutorial-keep-get.html
 
-<notextile><a name="dir"></a></notextile>It is also possible to upload an entire directory with @arv keep put@:
+<notextile><a name="dir"></a></notextile>It is also possible to upload an entire directory with @arv-put@:
 
 <notextile>
 <pre><code>~$ <span class="userinput">mkdir tmp</span>
 ~$ <span class="userinput">echo "hello alice" > tmp/alice.txt</span>
 ~$ <span class="userinput">echo "hello bob" > tmp/bob.txt</span>
 ~$ <span class="userinput">echo "hello carol" > tmp/carol.txt</span>
-~$ <span class="userinput">arv keep put tmp</span>
+~$ <span class="userinput">arv-put tmp</span>
 0M / 0M 100.0%
 Collection saved as ...
 qr1hi-4zz18-yyyyyyyyyyyyyyy
 </code></pre>
 </notextile>
 
-In both examples, the @arv keep put@ command created a collection. The first collection contains the single uploaded file. The second collection contains the entire uploaded directory.
+In both examples, the @arv-put@ command created a collection. The first collection contains the single uploaded file. The second collection contains the entire uploaded directory.
 
-@arv keep put@ accepts quite a few optional command line arguments, which are described on the "arv subcommands":{{site.baseurl}}/sdk/cli/subcommands.html#arv-keep-put page.
+@arv-put@ accepts quite a few optional command line arguments, which are described on the "arv subcommands":{{site.baseurl}}/sdk/cli/subcommands.html#arv-keep-put page.
 
 h3. Locate your collection in Workbench
 
-Visit the Workbench *Dashboard*.  Click on *Projects*<span class="caret"></span> dropdown menu in the top navigation menu, select your *Home* project.  Your newly uploaded collection should appear near the top of the *Data collections* tab.  The collection name printed by @arv keep put@ will appear under the *name* column.
+Visit the Workbench *Dashboard*.  Click on *Projects*<span class="caret"></span> dropdown menu in the top navigation menu, select your *Home* project.  Your newly uploaded collection should appear near the top of the *Data collections* tab.  The collection name printed by @arv-put@ will appear under the *name* column.
 
 To move the collection to a different project, check the box at the left of the collection row.  Pull down the *Selection...*<span class="caret"></span> menu near the top of the page tab, and select *Move selected...* button. This will open a dialog box where you can select a destination project for the collection.  Click a project, then finally the <span class="btn btn-sm btn-primary">Move</span> button.
 
index 858bfc2b26a7e6deb90142c0f7d5904e18ef1474..a41e4b0331548f977d69b3ce993795c51e28ea1d 100644 (file)
@@ -13,13 +13,12 @@ import (
        "os"
        "os/exec"
 
-       "git.curoverse.com/arvados.git/lib/cmd"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "github.com/ghodss/yaml"
 )
 
-var DumpCommand cmd.Handler = dumpCommand{}
+var DumpCommand dumpCommand
 
 type dumpCommand struct{}
 
@@ -62,7 +61,7 @@ func (dumpCommand) RunCommand(prog string, args []string, stdin io.Reader, stdou
        return 0
 }
 
-var CheckCommand cmd.Handler = checkCommand{}
+var CheckCommand checkCommand
 
 type checkCommand struct{}
 
index d77003b478112e6ee3960e65541bb9fe1dc0a083..f2915a03917260aa07fd5c656b5a1b9c7833757f 100644 (file)
@@ -7,11 +7,18 @@ package config
 import (
        "bytes"
 
+       "git.curoverse.com/arvados.git/lib/cmd"
        check "gopkg.in/check.v1"
 )
 
 var _ = check.Suite(&CommandSuite{})
 
+var (
+       // Commands must satisfy cmd.Handler interface
+       _ cmd.Handler = dumpCommand{}
+       _ cmd.Handler = checkCommand{}
+)
+
 type CommandSuite struct{}
 
 func (s *CommandSuite) TestBadArg(c *check.C) {
@@ -52,7 +59,7 @@ Clusters:
 `
        code := CheckCommand.RunCommand("arvados config-check", []string{"-config", "-"}, bytes.NewBufferString(in), &stdout, &stderr)
        c.Check(code, check.Equals, 1)
-       c.Check(stdout.String(), check.Matches, `(?ms).*API:\n\- +.*MaxItemsPerResponse: 1000\n\+ +MaxItemsPerResponse: 1234\n.*`)
+       c.Check(stdout.String(), check.Matches, `(?ms).*\n\- +.*MaxItemsPerResponse: 1000\n\+ +MaxItemsPerResponse: 1234\n.*`)
 }
 
 func (s *CommandSuite) TestCheckUnknownKey(c *check.C) {
index 94cd8fcbf65d2181c918818f7ba4779408b281a0..dc128e56b5aef01d90531317ee61c498b394aa92 100644 (file)
@@ -35,11 +35,13 @@ Clusters:
         InternalURLs: {}
         ExternalURL: ""
       GitSSH:
+        InternalURLs: {}
         ExternalURL: ""
       DispatchCloud:
         InternalURLs: {}
         ExternalURL: "-"
       SSO:
+        InternalURLs: {}
         ExternalURL: ""
       Keepproxy:
         InternalURLs: {}
@@ -54,13 +56,16 @@ Clusters:
         InternalURLs: {}
         ExternalURL: "-"
       Composer:
+        InternalURLs: {}
         ExternalURL: ""
       WebShell:
+        InternalURLs: {}
         ExternalURL: ""
       Workbench1:
         InternalURLs: {}
         ExternalURL: ""
       Workbench2:
+        InternalURLs: {}
         ExternalURL: ""
       Nodemanager:
         InternalURLs: {}
@@ -113,7 +118,7 @@ Clusters:
       # Interval (seconds) between asynchronous permission view updates. Any
       # permission-updating API called with the 'async' parameter schedules a an
       # update on the permission view in the future, if not already scheduled.
-      AsyncPermissionsUpdateInterval: 20
+      AsyncPermissionsUpdateInterval: 20s
 
       # Maximum number of concurrent outgoing requests to make while
       # serving a single incoming multi-cluster (federated) request.
@@ -260,7 +265,7 @@ Clusters:
       # Interval (seconds) between trash sweeps. During a trash sweep,
       # collections are marked as trash if their trash_at time has
       # arrived, and deleted if their delete_at time has arrived.
-      TrashSweepInterval: 60
+      TrashSweepInterval: 60s
 
       # If true, enable collection versioning.
       # When a collection's preserve_version field is true or the current version
@@ -269,10 +274,10 @@ Clusters:
       # the current collection.
       CollectionVersioning: false
 
-      #   0 = auto-create a new version on every update.
-      #  -1 = never auto-create new versions.
-      # > 0 = auto-create a new version when older than the specified number of seconds.
-      PreserveVersionIfIdle: -1
+      #   0s = auto-create a new version on every update.
+      #  -1s = never auto-create new versions.
+      # > 0s = auto-create a new version when older than the specified number of seconds.
+      PreserveVersionIfIdle: -1s
 
     Login:
       # These settings are provided by your OAuth2 provider (e.g.,
@@ -336,12 +341,6 @@ Clusters:
       # scheduling parameter parameter set.
       UsePreemptibleInstances: false
 
-      # Include details about job reuse decisions in the server log. This
-      # causes additional database queries to run, so it should not be
-      # enabled unless you expect to examine the resulting logs for
-      # troubleshooting purposes.
-      LogReuseDecisions: false
-
       # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the
       # (experimental) cloud dispatcher for executing containers on
       # worker VMs. Begins with "-----BEGIN RSA PRIVATE KEY-----\n"
@@ -366,8 +365,8 @@ Clusters:
         LogBytesPerEvent: 4096
         LogSecondsBetweenEvents: 1
 
-        # The sample period for throttling logs, in seconds.
-        LogThrottlePeriod: 60
+        # The sample period for throttling logs.
+        LogThrottlePeriod: 60s
 
         # Maximum number of bytes that job can log over crunch_log_throttle_period
         # before being silenced until the end of the period.
@@ -381,18 +380,18 @@ Clusters:
         # silenced by throttling are not counted against this total.
         LimitLogBytesPerJob: 67108864
 
-        LogPartialLineThrottlePeriod: 5
+        LogPartialLineThrottlePeriod: 5s
 
-        # Container logs are written to Keep and saved in a collection,
-        # which is updated periodically while the container runs.  This
-        # value sets the interval (given in seconds) between collection
-        # updates.
-        LogUpdatePeriod: 1800
+        # Container logs are written to Keep and saved in a
+        # collection, which is updated periodically while the
+        # container runs.  This value sets the interval between
+        # collection updates.
+        LogUpdatePeriod: 30m
 
         # The log collection is also updated when the specified amount of
         # log data (given in bytes) is produced in less than one update
         # period.
-        LogUpdateSize: 33554432
+        LogUpdateSize: 32MiB
 
       SLURM:
         Managed:
@@ -528,7 +527,7 @@ Clusters:
         TimeoutShutdown: 10s
 
         # Worker VM image ID.
-        ImageID: ami-01234567890abcdef
+        ImageID: ""
 
         # Tags to add on all resources (VMs, NICs, disks) created by
         # the container dispatcher. (Arvados's own tags --
@@ -613,8 +612,70 @@ Clusters:
         Insecure: false
         ActivateUsers: false
       SAMPLE:
+        # API endpoint host or host:port; default is {id}.arvadosapi.com
         Host: sample.arvadosapi.com
+
+        # Perform a proxy request when a local client requests an
+        # object belonging to this remote.
         Proxy: false
+
+        # Default "https". Can be set to "http" for testing.
         Scheme: https
+
+        # Disable TLS verify. Can be set to true for testing.
         Insecure: false
+
+        # When users present tokens issued by this remote cluster, and
+        # their accounts are active on the remote cluster, activate
+        # them on this cluster too.
         ActivateUsers: false
+
+    Workbench:
+      # Workbench1 configs
+      Theme: default
+      ActivationContactLink: mailto:info@arvados.org
+      ArvadosDocsite: https://doc.arvados.org
+      ArvadosPublicDataDocURL: https://playground.arvados.org/projects/public
+      ShowUserAgreementInline: false
+      SecretToken: ""
+      SecretKeyBase: ""
+      RepositoryCache: /var/www/arvados-workbench/current/tmp/git
+      UserProfileFormFields:
+        SAMPLE:
+          Type: text
+          FormFieldTitle: ""
+          FormFieldDescription: ""
+          Required: true
+      UserProfileFormMessage: 'Welcome to Arvados. All <span style="color:red">required fields</span> must be completed before you can proceed.'
+      ApplicationMimetypesWithViewIcon:
+        cwl: {}
+        fasta: {}
+        go: {}
+        javascript: {}
+        json: {}
+        pdf: {}
+        python: {}
+        x-python: {}
+        r: {}
+        rtf: {}
+        sam: {}
+        x-sh: {}
+        vnd.realvnc.bed: {}
+        xml: {}
+        xsl: {}
+      LogViewerMaxBytes: 1M
+      EnablePublicProjectsPage: true
+      EnableGettingStartedPopup: false
+      APIResponseCompression: true
+      APIClientConnectTimeout: 2m
+      APIClientReceiveTimeout: 5m
+      RunningJobLogRecordsToFetch: 2000
+      ShowRecentCollectionsOnDashboard: true
+      ShowUserNotifications: true
+      MultiSiteSearch: false
+      Repositories: true
+      SiteName: Arvados Workbench
+
+      # Workbench2 configs
+      VocabularyURL: ""
+      FileViewersConfigURL: ""
diff --git a/lib/config/export.go b/lib/config/export.go
new file mode 100644 (file)
index 0000000..39344c0
--- /dev/null
@@ -0,0 +1,155 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package config
+
+import (
+       "encoding/json"
+       "errors"
+       "fmt"
+       "io"
+       "strings"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+// ExportJSON writes a JSON object with the safe (non-secret) portions
+// of the cluster config to w.
+func ExportJSON(w io.Writer, cluster *arvados.Cluster) error {
+       buf, err := json.Marshal(cluster)
+       if err != nil {
+               return err
+       }
+       var m map[string]interface{}
+       err = json.Unmarshal(buf, &m)
+       if err != nil {
+               return err
+       }
+       err = redactUnsafe(m, "", "")
+       if err != nil {
+               return err
+       }
+       return json.NewEncoder(w).Encode(m)
+}
+
+// whitelist classifies configs as safe/unsafe to reveal to
+// unauthenticated clients.
+//
+// Every config entry must either be listed explicitly here along with
+// all of its parent keys (e.g., "API" + "API.RequestTimeout"), or
+// have an ancestor listed as false (e.g.,
+// "PostgreSQL.Connection.password" has an ancestor
+// "PostgreSQL.Connection" with a false value). Otherwise, it is a bug
+// which should be caught by tests.
+//
+// Example: API.RequestTimeout is safe because whitelist["API"] == and
+// whitelist["API.RequestTimeout"] == true.
+//
+// Example: PostgreSQL.Connection.password is not safe because
+// whitelist["PostgreSQL.Connection"] == false.
+//
+// Example: PostgreSQL.BadKey would cause an error because
+// whitelist["PostgreSQL"] isn't false, and neither
+// whitelist["PostgreSQL.BadKey"] nor whitelist["PostgreSQL.*"]
+// exists.
+var whitelist = map[string]bool{
+       // | sort -t'"' -k2,2
+       "API":                                        true,
+       "API.AsyncPermissionsUpdateInterval":         false,
+       "API.DisabledAPIs":                           false,
+       "API.MaxIndexDatabaseRead":                   false,
+       "API.MaxItemsPerResponse":                    true,
+       "API.MaxRequestAmplification":                false,
+       "API.MaxRequestSize":                         true,
+       "API.RailsSessionSecretToken":                false,
+       "API.RequestTimeout":                         true,
+       "AuditLogs":                                  false,
+       "AuditLogs.MaxAge":                           false,
+       "AuditLogs.MaxDeleteBatch":                   false,
+       "AuditLogs.UnloggedAttributes":               false,
+       "Collections":                                true,
+       "Collections.BlobSigning":                    true,
+       "Collections.BlobSigningKey":                 false,
+       "Collections.BlobSigningTTL":                 true,
+       "Collections.CollectionVersioning":           false,
+       "Collections.DefaultReplication":             true,
+       "Collections.DefaultTrashLifetime":           true,
+       "Collections.PreserveVersionIfIdle":          true,
+       "Collections.TrashSweepInterval":             false,
+       "Containers":                                 true,
+       "Containers.CloudVMs":                        false,
+       "Containers.DefaultKeepCacheRAM":             true,
+       "Containers.DispatchPrivateKey":              false,
+       "Containers.JobsAPI":                         true,
+       "Containers.JobsAPI.CrunchJobUser":           false,
+       "Containers.JobsAPI.CrunchJobWrapper":        false,
+       "Containers.JobsAPI.CrunchRefreshTrigger":    false,
+       "Containers.JobsAPI.DefaultDockerImage":      false,
+       "Containers.JobsAPI.Enable":                  true,
+       "Containers.JobsAPI.GitInternalDir":          false,
+       "Containers.JobsAPI.ReuseJobIfOutputsDiffer": false,
+       "Containers.Logging":                         false,
+       "Containers.LogReuseDecisions":               false,
+       "Containers.MaxComputeVMs":                   false,
+       "Containers.MaxDispatchAttempts":             false,
+       "Containers.MaxRetryAttempts":                true,
+       "Containers.SLURM":                           false,
+       "Containers.StaleLockTimeout":                false,
+       "Containers.SupportedDockerImageFormats":     true,
+       "Containers.UsePreemptibleInstances":         true,
+       "Git":                                        false,
+       "InstanceTypes":                              true,
+       "InstanceTypes.*":                            true,
+       "InstanceTypes.*.*":                          true,
+       "Login":                                      false,
+       "Mail":                                       false,
+       "ManagementToken":                            false,
+       "PostgreSQL":                                 false,
+       "RemoteClusters":                             true,
+       "RemoteClusters.*":                           true,
+       "RemoteClusters.*.ActivateUsers":             true,
+       "RemoteClusters.*.Host":                      true,
+       "RemoteClusters.*.Insecure":                  true,
+       "RemoteClusters.*.Proxy":                     true,
+       "RemoteClusters.*.Scheme":                    true,
+       "Services":                                   true,
+       "Services.*":                                 true,
+       "Services.*.ExternalURL":                     true,
+       "Services.*.InternalURLs":                    false,
+       "SystemLogs":                                 false,
+       "SystemRootToken":                            false,
+       "TLS":                                        false,
+       "Users":                                      false,
+       "Workbench":                                  false,
+}
+
+func redactUnsafe(m map[string]interface{}, mPrefix, lookupPrefix string) error {
+       var errs []string
+       for k, v := range m {
+               lookupKey := k
+               safe, ok := whitelist[lookupPrefix+k]
+               if !ok {
+                       lookupKey = "*"
+                       safe, ok = whitelist[lookupPrefix+"*"]
+               }
+               if !ok {
+                       errs = append(errs, fmt.Sprintf("config bug: key %q not in whitelist map", lookupPrefix+k))
+                       continue
+               }
+               if !safe {
+                       delete(m, k)
+                       continue
+               }
+               if v, ok := v.(map[string]interface{}); ok {
+                       err := redactUnsafe(v, mPrefix+k+".", lookupPrefix+lookupKey+".")
+                       if err != nil {
+                               errs = append(errs, err.Error())
+                       }
+               }
+       }
+       if len(errs) > 0 {
+               return errors.New(strings.Join(errs, "\n"))
+       }
+       return nil
+}
diff --git a/lib/config/export_test.go b/lib/config/export_test.go
new file mode 100644 (file)
index 0000000..581e54c
--- /dev/null
@@ -0,0 +1,37 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package config
+
+import (
+       "bytes"
+       "regexp"
+       "strings"
+
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&ExportSuite{})
+
+type ExportSuite struct{}
+
+func (s *ExportSuite) TestExport(c *check.C) {
+       confdata := bytes.Replace(DefaultYAML, []byte("SAMPLE"), []byte("testkey"), -1)
+       cfg, err := Load(bytes.NewBuffer(confdata), ctxlog.TestLogger(c))
+       c.Assert(err, check.IsNil)
+       cluster := cfg.Clusters["xxxxx"]
+       cluster.ManagementToken = "abcdefg"
+
+       var exported bytes.Buffer
+       err = ExportJSON(&exported, &cluster)
+       c.Check(err, check.IsNil)
+       if err != nil {
+               c.Logf("If all the new keys are safe, add these to whitelist in export.go:")
+               for _, k := range regexp.MustCompile(`"[^"]*"`).FindAllString(err.Error(), -1) {
+                       c.Logf("\t%q: true,", strings.Replace(k, `"`, "", -1))
+               }
+       }
+       c.Check(exported.String(), check.Not(check.Matches), `(?ms).*abcdefg.*`)
+}
index 3492615e9959f48f6e3134f733194e93af043e93..98cd343bd1698980901cd3ec55461bd3e4953755 100644 (file)
@@ -41,11 +41,13 @@ Clusters:
         InternalURLs: {}
         ExternalURL: ""
       GitSSH:
+        InternalURLs: {}
         ExternalURL: ""
       DispatchCloud:
         InternalURLs: {}
         ExternalURL: "-"
       SSO:
+        InternalURLs: {}
         ExternalURL: ""
       Keepproxy:
         InternalURLs: {}
@@ -60,13 +62,16 @@ Clusters:
         InternalURLs: {}
         ExternalURL: "-"
       Composer:
+        InternalURLs: {}
         ExternalURL: ""
       WebShell:
+        InternalURLs: {}
         ExternalURL: ""
       Workbench1:
         InternalURLs: {}
         ExternalURL: ""
       Workbench2:
+        InternalURLs: {}
         ExternalURL: ""
       Nodemanager:
         InternalURLs: {}
@@ -119,7 +124,7 @@ Clusters:
       # Interval (seconds) between asynchronous permission view updates. Any
       # permission-updating API called with the 'async' parameter schedules a an
       # update on the permission view in the future, if not already scheduled.
-      AsyncPermissionsUpdateInterval: 20
+      AsyncPermissionsUpdateInterval: 20s
 
       # Maximum number of concurrent outgoing requests to make while
       # serving a single incoming multi-cluster (federated) request.
@@ -266,7 +271,7 @@ Clusters:
       # Interval (seconds) between trash sweeps. During a trash sweep,
       # collections are marked as trash if their trash_at time has
       # arrived, and deleted if their delete_at time has arrived.
-      TrashSweepInterval: 60
+      TrashSweepInterval: 60s
 
       # If true, enable collection versioning.
       # When a collection's preserve_version field is true or the current version
@@ -275,10 +280,10 @@ Clusters:
       # the current collection.
       CollectionVersioning: false
 
-      #   0 = auto-create a new version on every update.
-      #  -1 = never auto-create new versions.
-      # > 0 = auto-create a new version when older than the specified number of seconds.
-      PreserveVersionIfIdle: -1
+      #   0s = auto-create a new version on every update.
+      #  -1s = never auto-create new versions.
+      # > 0s = auto-create a new version when older than the specified number of seconds.
+      PreserveVersionIfIdle: -1s
 
     Login:
       # These settings are provided by your OAuth2 provider (e.g.,
@@ -342,12 +347,6 @@ Clusters:
       # scheduling parameter parameter set.
       UsePreemptibleInstances: false
 
-      # Include details about job reuse decisions in the server log. This
-      # causes additional database queries to run, so it should not be
-      # enabled unless you expect to examine the resulting logs for
-      # troubleshooting purposes.
-      LogReuseDecisions: false
-
       # PEM encoded SSH key (RSA, DSA, or ECDSA) used by the
       # (experimental) cloud dispatcher for executing containers on
       # worker VMs. Begins with "-----BEGIN RSA PRIVATE KEY-----\n"
@@ -372,8 +371,8 @@ Clusters:
         LogBytesPerEvent: 4096
         LogSecondsBetweenEvents: 1
 
-        # The sample period for throttling logs, in seconds.
-        LogThrottlePeriod: 60
+        # The sample period for throttling logs.
+        LogThrottlePeriod: 60s
 
         # Maximum number of bytes that job can log over crunch_log_throttle_period
         # before being silenced until the end of the period.
@@ -387,18 +386,18 @@ Clusters:
         # silenced by throttling are not counted against this total.
         LimitLogBytesPerJob: 67108864
 
-        LogPartialLineThrottlePeriod: 5
+        LogPartialLineThrottlePeriod: 5s
 
-        # Container logs are written to Keep and saved in a collection,
-        # which is updated periodically while the container runs.  This
-        # value sets the interval (given in seconds) between collection
-        # updates.
-        LogUpdatePeriod: 1800
+        # Container logs are written to Keep and saved in a
+        # collection, which is updated periodically while the
+        # container runs.  This value sets the interval between
+        # collection updates.
+        LogUpdatePeriod: 30m
 
         # The log collection is also updated when the specified amount of
         # log data (given in bytes) is produced in less than one update
         # period.
-        LogUpdateSize: 33554432
+        LogUpdateSize: 32MiB
 
       SLURM:
         Managed:
@@ -534,7 +533,7 @@ Clusters:
         TimeoutShutdown: 10s
 
         # Worker VM image ID.
-        ImageID: ami-01234567890abcdef
+        ImageID: ""
 
         # Tags to add on all resources (VMs, NICs, disks) created by
         # the container dispatcher. (Arvados's own tags --
@@ -619,9 +618,71 @@ Clusters:
         Insecure: false
         ActivateUsers: false
       SAMPLE:
+        # API endpoint host or host:port; default is {id}.arvadosapi.com
         Host: sample.arvadosapi.com
+
+        # Perform a proxy request when a local client requests an
+        # object belonging to this remote.
         Proxy: false
+
+        # Default "https". Can be set to "http" for testing.
         Scheme: https
+
+        # Disable TLS verify. Can be set to true for testing.
         Insecure: false
+
+        # When users present tokens issued by this remote cluster, and
+        # their accounts are active on the remote cluster, activate
+        # them on this cluster too.
         ActivateUsers: false
+
+    Workbench:
+      # Workbench1 configs
+      Theme: default
+      ActivationContactLink: mailto:info@arvados.org
+      ArvadosDocsite: https://doc.arvados.org
+      ArvadosPublicDataDocURL: https://playground.arvados.org/projects/public
+      ShowUserAgreementInline: false
+      SecretToken: ""
+      SecretKeyBase: ""
+      RepositoryCache: /var/www/arvados-workbench/current/tmp/git
+      UserProfileFormFields:
+        SAMPLE:
+          Type: text
+          FormFieldTitle: ""
+          FormFieldDescription: ""
+          Required: true
+      UserProfileFormMessage: 'Welcome to Arvados. All <span style="color:red">required fields</span> must be completed before you can proceed.'
+      ApplicationMimetypesWithViewIcon:
+        cwl: {}
+        fasta: {}
+        go: {}
+        javascript: {}
+        json: {}
+        pdf: {}
+        python: {}
+        x-python: {}
+        r: {}
+        rtf: {}
+        sam: {}
+        x-sh: {}
+        vnd.realvnc.bed: {}
+        xml: {}
+        xsl: {}
+      LogViewerMaxBytes: 1M
+      EnablePublicProjectsPage: true
+      EnableGettingStartedPopup: false
+      APIResponseCompression: true
+      APIClientConnectTimeout: 2m
+      APIClientReceiveTimeout: 5m
+      RunningJobLogRecordsToFetch: 2000
+      ShowRecentCollectionsOnDashboard: true
+      ShowUserNotifications: true
+      MultiSiteSearch: false
+      Repositories: true
+      SiteName: Arvados Workbench
+
+      # Workbench2 configs
+      VocabularyURL: ""
+      FileViewersConfigURL: ""
 `)
index 6ce81bb5f9826b2374d9b7e77de9f1468e9e28c5..6b014476b6d9f31d6cf23197bcde6fac2fda0bc3 100644 (file)
@@ -97,6 +97,24 @@ Clusters:
        c.Check(logs, check.HasLen, 2)
 }
 
+func (s *LoadSuite) TestNoUnrecognizedKeysInDefaultConfig(c *check.C) {
+       var logbuf bytes.Buffer
+       logger := logrus.New()
+       logger.Out = &logbuf
+       var supplied map[string]interface{}
+       yaml.Unmarshal(DefaultYAML, &supplied)
+       cfg, err := Load(bytes.NewBuffer(DefaultYAML), logger)
+       c.Assert(err, check.IsNil)
+       var loaded map[string]interface{}
+       buf, err := yaml.Marshal(cfg)
+       c.Assert(err, check.IsNil)
+       err = yaml.Unmarshal(buf, &loaded)
+       c.Assert(err, check.IsNil)
+
+       logExtraKeys(logger, loaded, supplied, "")
+       c.Check(logbuf.String(), check.Equals, "")
+}
+
 func (s *LoadSuite) TestNoWarningsForDumpedConfig(c *check.C) {
        var logbuf bytes.Buffer
        logger := logrus.New()
index 1c859cfc515d142a0289610e402e725e07bfebb1..7d8e7a4334ae98bd727ed62725d1acea09746ae9 100644 (file)
@@ -57,12 +57,10 @@ func (s *FederationSuite) SetUpTest(c *check.C) {
        cluster := &arvados.Cluster{
                ClusterID:  "zhome",
                PostgreSQL: integrationTestCluster().PostgreSQL,
-               TLS:        arvados.TLS{Insecure: true},
-               API: arvados.API{
-                       MaxItemsPerResponse:     1000,
-                       MaxRequestAmplification: 4,
-               },
        }
+       cluster.TLS.Insecure = true
+       cluster.API.MaxItemsPerResponse = 1000
+       cluster.API.MaxRequestAmplification = 4
        arvadostest.SetServiceURL(&cluster.Services.RailsAPI, "http://localhost:1/")
        arvadostest.SetServiceURL(&cluster.Services.Controller, "http://localhost:/")
        s.testHandler = &Handler{Cluster: cluster}
index 2c3ce1d4f28d189e956cd3e120b8433214861619..12faacdd4398211f8466a4ed7e971283190b9871 100644 (file)
@@ -5,16 +5,19 @@
 package controller
 
 import (
+       "bytes"
        "context"
        "database/sql"
        "errors"
        "fmt"
+       "io"
        "net/http"
        "net/url"
        "strings"
        "sync"
        "time"
 
+       "git.curoverse.com/arvados.git/lib/config"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/health"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
@@ -73,6 +76,18 @@ func (h *Handler) setup() {
                Prefix: "/_health/",
                Routes: health.Routes{"ping": func() error { _, err := h.db(&http.Request{}); return err }},
        })
+
+       mux.Handle("/arvados/v1/config", http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+               var buf bytes.Buffer
+               err := config.ExportJSON(&buf, h.Cluster)
+               if err != nil {
+                       httpserver.Error(w, err.Error(), http.StatusInternalServerError)
+                       return
+               }
+               w.Header().Set("Content-Type", "application/json")
+               io.Copy(w, &buf)
+       }))
+
        hs := http.NotFoundHandler()
        hs = prepend(hs, h.proxyRailsAPI)
        hs = h.setupProxyRemoteCluster(hs)
index a1efaacddff5b2b7c52ad8fd78eb79c0500b2be8..9b0ff2764be620bd847dc03c2da2f0848b008f07 100644 (file)
@@ -42,8 +42,8 @@ func (s *HandlerSuite) SetUpTest(c *check.C) {
        s.cluster = &arvados.Cluster{
                ClusterID:  "zzzzz",
                PostgreSQL: integrationTestCluster().PostgreSQL,
-               TLS:        arvados.TLS{Insecure: true},
        }
+       s.cluster.TLS.Insecure = true
        arvadostest.SetServiceURL(&s.cluster.Services.RailsAPI, "https://"+os.Getenv("ARVADOS_TEST_API_HOST"))
        arvadostest.SetServiceURL(&s.cluster.Services.Controller, "http://localhost:/")
        s.handler = newHandler(s.ctx, s.cluster, "")
@@ -53,6 +53,25 @@ func (s *HandlerSuite) TearDownTest(c *check.C) {
        s.cancel()
 }
 
+func (s *HandlerSuite) TestConfigExport(c *check.C) {
+       s.cluster.ManagementToken = "secret"
+       s.cluster.SystemRootToken = "secret"
+       s.cluster.Collections.BlobSigning = true
+       s.cluster.Collections.BlobSigningTTL = arvados.Duration(23 * time.Second)
+       req := httptest.NewRequest("GET", "/arvados/v1/config", nil)
+       resp := httptest.NewRecorder()
+       s.handler.ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+       var cluster arvados.Cluster
+       c.Log(resp.Body.String())
+       err := json.Unmarshal(resp.Body.Bytes(), &cluster)
+       c.Check(err, check.IsNil)
+       c.Check(cluster.ManagementToken, check.Equals, "")
+       c.Check(cluster.SystemRootToken, check.Equals, "")
+       c.Check(cluster.Collections.BlobSigning, check.DeepEquals, true)
+       c.Check(cluster.Collections.BlobSigningTTL, check.Equals, arvados.Duration(23*time.Second))
+}
+
 func (s *HandlerSuite) TestProxyDiscoveryDoc(c *check.C) {
        req := httptest.NewRequest("GET", "/discovery/v1/apis/arvados/v1/rest", nil)
        resp := httptest.NewRecorder()
index a398af97b21884ae896f675b1c2ab00a59ae55d4..ae7f138b1b6862ab43022ed91b0fbdd360b3dc36 100644 (file)
@@ -36,8 +36,8 @@ func newServerFromIntegrationTestEnv(c *check.C) *httpserver.Server {
        handler := &Handler{Cluster: &arvados.Cluster{
                ClusterID:  "zzzzz",
                PostgreSQL: integrationTestCluster().PostgreSQL,
-               TLS:        arvados.TLS{Insecure: true},
        }}
+       handler.Cluster.TLS.Insecure = true
        arvadostest.SetServiceURL(&handler.Cluster.Services.RailsAPI, "https://"+os.Getenv("ARVADOS_TEST_API_HOST"))
        arvadostest.SetServiceURL(&handler.Cluster.Services.Controller, "http://localhost:/")
 
index bc699d92804092d8dbbc37bdcd3d8180b67e70c1..12c60ecb11177871a4b5230adddd0e313898270c 100644 (file)
@@ -132,12 +132,12 @@ func (disp *dispatcher) initialize() {
                disp.sshKey = key
        }
 
-       instanceSet, err := newInstanceSet(disp.Cluster, disp.InstanceSetID, disp.logger)
+       disp.reg = prometheus.NewRegistry()
+       instanceSet, err := newInstanceSet(disp.Cluster, disp.InstanceSetID, disp.logger, disp.reg)
        if err != nil {
                disp.logger.Fatalf("error initializing driver: %s", err)
        }
        disp.instanceSet = instanceSet
-       disp.reg = prometheus.NewRegistry()
        disp.pool = worker.NewPool(disp.logger, disp.ArvClient, disp.reg, disp.InstanceSetID, disp.instanceSet, disp.newExecutor, disp.sshKey.PublicKey(), disp.Cluster)
        disp.queue = container.NewQueue(disp.logger, disp.reg, disp.typeChooser, disp.ArvClient)
 
index 012621f12f633fe9c352e2f6bb847dadb965a59d..6b73e71ccd4267c5e9e6f13749499d8feb3f1a6f 100644 (file)
@@ -49,6 +49,7 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) {
        }
 
        s.cluster = &arvados.Cluster{
+               ManagementToken: "test-management-token",
                Containers: arvados.ContainersConfig{
                        DispatchPrivateKey: string(dispatchprivraw),
                        StaleLockTimeout:   arvados.Duration(5 * time.Millisecond),
@@ -193,6 +194,18 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
                        c.Fatalf("timed out with %d containers (%v), %d instances (%+v)", len(ents), ents, len(insts), insts)
                }
        }
+
+       req := httptest.NewRequest("GET", "/metrics", nil)
+       req.Header.Set("Authorization", "Bearer "+s.cluster.ManagementToken)
+       resp := httptest.NewRecorder()
+       s.disp.ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+       c.Check(resp.Body.String(), check.Matches, `(?ms).*driver_operations{error="0",operation="Create"} [^0].*`)
+       c.Check(resp.Body.String(), check.Matches, `(?ms).*driver_operations{error="0",operation="List"} [^0].*`)
+       c.Check(resp.Body.String(), check.Matches, `(?ms).*driver_operations{error="1",operation="Create"} [^0].*`)
+       c.Check(resp.Body.String(), check.Matches, `(?ms).*driver_operations{error="1",operation="List"} 0\n.*`)
+       c.Check(resp.Body.String(), check.Matches, `(?ms).*instances_disappeared{state="shutdown"} [^0].*`)
+       c.Check(resp.Body.String(), check.Matches, `(?ms).*instances_disappeared{state="unknown"} 0\n.*`)
 }
 
 func (s *DispatcherSuite) TestAPIPermissions(c *check.C) {
index b67b5d054b57d172b940255a8318b76dd21af3b8..a8f3d5b5edce06411d7001809caf05ab51e49846 100644 (file)
@@ -12,6 +12,7 @@ import (
        "git.curoverse.com/arvados.git/lib/cloud/azure"
        "git.curoverse.com/arvados.git/lib/cloud/ec2"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/prometheus/client_golang/prometheus"
        "github.com/sirupsen/logrus"
        "golang.org/x/crypto/ssh"
 )
@@ -21,13 +22,14 @@ var drivers = map[string]cloud.Driver{
        "ec2":   ec2.Driver,
 }
 
-func newInstanceSet(cluster *arvados.Cluster, setID cloud.InstanceSetID, logger logrus.FieldLogger) (cloud.InstanceSet, error) {
+func newInstanceSet(cluster *arvados.Cluster, setID cloud.InstanceSetID, logger logrus.FieldLogger, reg *prometheus.Registry) (cloud.InstanceSet, error) {
        driver, ok := drivers[cluster.Containers.CloudVMs.Driver]
        if !ok {
                return nil, fmt.Errorf("unsupported cloud driver %q", cluster.Containers.CloudVMs.Driver)
        }
        sharedResourceTags := cloud.SharedResourceTags(cluster.Containers.CloudVMs.ResourceTags)
        is, err := driver.InstanceSet(cluster.Containers.CloudVMs.DriverParameters, setID, sharedResourceTags, logger)
+       is = newInstrumentedInstanceSet(is, reg)
        if maxops := cluster.Containers.CloudVMs.MaxCloudOpsPerSecond; maxops > 0 {
                is = rateLimitedInstanceSet{
                        InstanceSet: is,
@@ -113,3 +115,65 @@ nextInstance:
        }).WithError(err).Debugf("filteringInstanceSet returning instances")
        return returning, err
 }
+
+func newInstrumentedInstanceSet(is cloud.InstanceSet, reg *prometheus.Registry) cloud.InstanceSet {
+       cv := prometheus.NewCounterVec(prometheus.CounterOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "driver_operations",
+               Help:      "Number of instance-create/destroy/list operations performed via cloud driver.",
+       }, []string{"operation", "error"})
+
+       // Create all counters, so they are reported with zero values
+       // (instead of being missing) until they are incremented.
+       for _, op := range []string{"Create", "List", "Destroy", "SetTags"} {
+               for _, error := range []string{"0", "1"} {
+                       cv.WithLabelValues(op, error).Add(0)
+               }
+       }
+
+       reg.MustRegister(cv)
+       return instrumentedInstanceSet{is, cv}
+}
+
+type instrumentedInstanceSet struct {
+       cloud.InstanceSet
+       cv *prometheus.CounterVec
+}
+
+func (is instrumentedInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags cloud.InstanceTags, init cloud.InitCommand, pk ssh.PublicKey) (cloud.Instance, error) {
+       inst, err := is.InstanceSet.Create(it, image, tags, init, pk)
+       is.cv.WithLabelValues("Create", boolLabelValue(err != nil)).Inc()
+       return instrumentedInstance{inst, is.cv}, err
+}
+
+func (is instrumentedInstanceSet) Instances(tags cloud.InstanceTags) ([]cloud.Instance, error) {
+       instances, err := is.InstanceSet.Instances(tags)
+       is.cv.WithLabelValues("List", boolLabelValue(err != nil)).Inc()
+       return instances, err
+}
+
+type instrumentedInstance struct {
+       cloud.Instance
+       cv *prometheus.CounterVec
+}
+
+func (inst instrumentedInstance) Destroy() error {
+       err := inst.Instance.Destroy()
+       inst.cv.WithLabelValues("Destroy", boolLabelValue(err != nil)).Inc()
+       return err
+}
+
+func (inst instrumentedInstance) SetTags(tags cloud.InstanceTags) error {
+       err := inst.Instance.SetTags(tags)
+       inst.cv.WithLabelValues("SetTags", boolLabelValue(err != nil)).Inc()
+       return err
+}
+
+func boolLabelValue(v bool) string {
+       if v {
+               return "1"
+       } else {
+               return "0"
+       }
+}
index 0ee36a96ff1d23d3c27e48679dba4b31007299f4..201e8aad276eb5f8ca353e3ff8c41fc9dee00f2a 100644 (file)
@@ -169,6 +169,7 @@ type Pool struct {
        mInstancesPrice    *prometheus.GaugeVec
        mVCPUs             *prometheus.GaugeVec
        mMemory            *prometheus.GaugeVec
+       mDisappearances    *prometheus.CounterVec
 }
 
 type createCall struct {
@@ -556,6 +557,16 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
                Help:      "Total memory on all cloud VMs.",
        }, []string{"category"})
        reg.MustRegister(wp.mMemory)
+       wp.mDisappearances = prometheus.NewCounterVec(prometheus.CounterOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "instances_disappeared",
+               Help:      "Number of occurrences of an instance disappearing from the cloud provider's list of instances.",
+       }, []string{"state"})
+       for _, v := range stateString {
+               wp.mDisappearances.WithLabelValues(v).Add(0)
+       }
+       reg.MustRegister(wp.mDisappearances)
 }
 
 func (wp *Pool) runMetrics() {
@@ -778,6 +789,9 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
                        "WorkerState": wkr.state,
                })
                logger.Info("instance disappeared in cloud")
+               if wp.mDisappearances != nil {
+                       wp.mDisappearances.WithLabelValues(stateString[wkr.state]).Inc()
+               }
                delete(wp.workers, id)
                go wkr.Close()
                notify = true
index d96bf25173a949dc0d95cb49f9ba639295c019b4..adee06723027916178b9270adcabb4b0386d3bc2 100644 (file)
@@ -50,12 +50,6 @@ func (sc *Config) GetCluster(clusterID string) (*Cluster, error) {
        }
 }
 
-type API struct {
-       MaxItemsPerResponse     int
-       MaxRequestAmplification int
-       RequestTimeout          Duration
-}
-
 type Cluster struct {
        ClusterID       string `json:"-"`
        ManagementToken string
@@ -65,28 +59,130 @@ type Cluster struct {
        Containers      ContainersConfig
        RemoteClusters  map[string]RemoteCluster
        PostgreSQL      PostgreSQL
-       API             API
-       SystemLogs      SystemLogs
-       TLS             TLS
+
+       API struct {
+               AsyncPermissionsUpdateInterval Duration
+               DisabledAPIs                   []string
+               MaxIndexDatabaseRead           int
+               MaxItemsPerResponse            int
+               MaxRequestAmplification        int
+               MaxRequestSize                 int
+               RailsSessionSecretToken        string
+               RequestTimeout                 Duration
+       }
+       AuditLogs struct {
+               MaxAge             Duration
+               MaxDeleteBatch     int
+               UnloggedAttributes []string
+       }
+       Collections struct {
+               BlobSigning           bool
+               BlobSigningKey        string
+               DefaultReplication    int
+               BlobSigningTTL        Duration
+               DefaultTrashLifetime  Duration
+               TrashSweepInterval    Duration
+               CollectionVersioning  bool
+               PreserveVersionIfIdle Duration
+       }
+       Git struct {
+               Repositories string
+       }
+       Login struct {
+               ProviderAppSecret string
+               ProviderAppID     string
+       }
+       Mail struct {
+               MailchimpAPIKey                string
+               MailchimpListID                string
+               SendUserSetupNotificationEmail string
+               IssueReporterEmailFrom         string
+               IssueReporterEmailTo           string
+               SupportEmailAddress            string
+               EmailFrom                      string
+       }
+       SystemLogs struct {
+               LogLevel                string
+               Format                  string
+               MaxRequestLogParamsSize int
+       }
+       TLS struct {
+               Certificate string
+               Key         string
+               Insecure    bool
+       }
+       Users struct {
+               AdminNotifierEmailFrom                string
+               AutoAdminFirstUser                    bool
+               AutoAdminUserWithEmail                string
+               AutoSetupNewUsers                     bool
+               AutoSetupNewUsersWithRepository       bool
+               AutoSetupNewUsersWithVmUUID           string
+               AutoSetupUsernameBlacklist            []string
+               EmailSubjectPrefix                    string
+               NewInactiveUserNotificationRecipients []string
+               NewUserNotificationRecipients         []string
+               NewUsersAreActive                     bool
+               UserNotifierEmailFrom                 string
+               UserProfileNotificationAddress        string
+       }
+       Workbench struct {
+               ActivationContactLink            string
+               APIClientConnectTimeout          Duration
+               APIClientReceiveTimeout          Duration
+               APIResponseCompression           bool
+               ApplicationMimetypesWithViewIcon map[string]struct{}
+               ArvadosDocsite                   string
+               ArvadosPublicDataDocURL          string
+               EnableGettingStartedPopup        bool
+               EnablePublicProjectsPage         bool
+               FileViewersConfigURL             string
+               LogViewerMaxBytes                ByteSize
+               MultiSiteSearch                  bool
+               Repositories                     bool
+               RepositoryCache                  string
+               RunningJobLogRecordsToFetch      int
+               SecretKeyBase                    string
+               SecretToken                      string
+               ShowRecentCollectionsOnDashboard bool
+               ShowUserAgreementInline          bool
+               ShowUserNotifications            bool
+               SiteName                         string
+               Theme                            string
+               UserProfileFormFields            map[string]struct {
+                       Type                 string
+                       FormFieldTitle       string
+                       FormFieldDescription string
+                       Required             bool
+               }
+               UserProfileFormMessage string
+               VocabularyURL          string
+       }
 }
 
 type Services struct {
-       Controller    Service
-       DispatchCloud Service
-       Health        Service
-       Keepbalance   Service
-       Keepproxy     Service
-       Keepstore     Service
-       Nodemanager   Service
-       RailsAPI      Service
-       WebDAV        Service
-       Websocket     Service
-       Workbench1    Service
-       Workbench2    Service
+       Composer       Service
+       Controller     Service
+       DispatchCloud  Service
+       GitHTTP        Service
+       GitSSH         Service
+       Health         Service
+       Keepbalance    Service
+       Keepproxy      Service
+       Keepstore      Service
+       Nodemanager    Service
+       RailsAPI       Service
+       SSO            Service
+       WebDAVDownload Service
+       WebDAV         Service
+       WebShell       Service
+       Websocket      Service
+       Workbench1     Service
+       Workbench2     Service
 }
 
 type Service struct {
-       InternalURLs map[URL]ServiceInstance `json:",omitempty"`
+       InternalURLs map[URL]ServiceInstance
        ExternalURL  URL
 }
 
@@ -109,12 +205,6 @@ func (su URL) MarshalText() ([]byte, error) {
 
 type ServiceInstance struct{}
 
-type SystemLogs struct {
-       LogLevel                string
-       Format                  string
-       MaxRequestLogParamsSize int
-}
-
 type PostgreSQL struct {
        Connection     PostgreSQLConnection
        ConnectionPool int
@@ -123,15 +213,11 @@ type PostgreSQL struct {
 type PostgreSQLConnection map[string]string
 
 type RemoteCluster struct {
-       // API endpoint host or host:port; default is {id}.arvadosapi.com
-       Host string
-       // Perform a proxy request when a local client requests an
-       // object belonging to this remote.
-       Proxy bool
-       // Scheme, default "https". Can be set to "http" for testing.
-       Scheme string
-       // Disable TLS verify. Can be set to true for testing.
-       Insecure bool
+       Host          string
+       Proxy         bool
+       Scheme        string
+       Insecure      bool
+       ActivateUsers bool
 }
 
 type InstanceType struct {
@@ -147,9 +233,49 @@ type InstanceType struct {
 }
 
 type ContainersConfig struct {
-       CloudVMs           CloudVMsConfig
-       DispatchPrivateKey string
-       StaleLockTimeout   Duration
+       CloudVMs                    CloudVMsConfig
+       DefaultKeepCacheRAM         ByteSize
+       DispatchPrivateKey          string
+       LogReuseDecisions           bool
+       MaxComputeVMs               int
+       MaxDispatchAttempts         int
+       MaxRetryAttempts            int
+       StaleLockTimeout            Duration
+       SupportedDockerImageFormats []string
+       UsePreemptibleInstances     bool
+
+       JobsAPI struct {
+               Enable                  string
+               GitInternalDir          string
+               DefaultDockerImage      string
+               CrunchJobWrapper        string
+               CrunchJobUser           string
+               CrunchRefreshTrigger    string
+               ReuseJobIfOutputsDiffer bool
+       }
+       Logging struct {
+               MaxAge                       Duration
+               LogBytesPerEvent             int
+               LogSecondsBetweenEvents      int
+               LogThrottlePeriod            Duration
+               LogThrottleBytes             int
+               LogThrottleLines             int
+               LimitLogBytesPerJob          int
+               LogPartialLineThrottlePeriod Duration
+               LogUpdatePeriod              Duration
+               LogUpdateSize                ByteSize
+       }
+       SLURM struct {
+               Managed struct {
+                       DNSServerConfDir       string
+                       DNSServerConfTemplate  string
+                       DNSServerReloadCommand string
+                       DNSServerUpdateCommand string
+                       ComputeNodeDomain      string
+                       ComputeNodeNameservers []string
+                       AssignNodeHostname     string
+               }
+       }
 }
 
 type CloudVMsConfig struct {
@@ -269,9 +395,3 @@ func (svcs Services) Map() map[ServiceName]Service {
                ServiceNameKeepstore:     svcs.Keepstore,
        }
 }
-
-type TLS struct {
-       Certificate string
-       Key         string
-       Insecure    bool
-}
index 2696fdb051146ca34bd311e7e29e1092b0a3723e..ee482fdf3150f6a2baf126d4d540f2479fe6ba69 100644 (file)
@@ -20,7 +20,9 @@ func (d *Duration) UnmarshalJSON(data []byte) error {
        if data[0] == '"' {
                return d.Set(string(data[1 : len(data)-1]))
        }
-       return fmt.Errorf("duration must be given as a string like \"600s\" or \"1h30m\"")
+       // Mimic error message returned by ParseDuration for a number
+       // without units.
+       return fmt.Errorf("missing unit in duration %s", data)
 }
 
 // MarshalJSON implements json.Marshaler.
index ee787a6a76a2807ebfce6211db28987250bbdd89..257a2b4ef54156d65b22bafb3152cc067de6cd13 100644 (file)
@@ -43,3 +43,20 @@ func (s *DurationSuite) TestMarshalJSON(c *check.C) {
                c.Check(string(buf), check.Equals, `"`+trial.out+`"`)
        }
 }
+
+func (s *DurationSuite) TestUnmarshalJSON(c *check.C) {
+       var d struct {
+               D Duration
+       }
+       err := json.Unmarshal([]byte(`{"D":1.234}`), &d)
+       c.Check(err, check.ErrorMatches, `missing unit in duration 1.234`)
+       err = json.Unmarshal([]byte(`{"D":"1.234"}`), &d)
+       c.Check(err, check.ErrorMatches, `.*missing unit in duration 1.234`)
+       err = json.Unmarshal([]byte(`{"D":"1"}`), &d)
+       c.Check(err, check.ErrorMatches, `.*missing unit in duration 1`)
+       err = json.Unmarshal([]byte(`{"D":"foobar"}`), &d)
+       c.Check(err, check.ErrorMatches, `.*invalid duration foobar`)
+       err = json.Unmarshal([]byte(`{"D":"60s"}`), &d)
+       c.Check(err, check.IsNil)
+       c.Check(d.D.Duration(), check.Equals, time.Minute)
+}
index 2e6484cabdf1e71d39f5fe21139b29c2ce09ad93..692146465216448b7cb3f95fa8ce20bf8beff83c 100644 (file)
@@ -34,7 +34,7 @@ def get_version(setup_dir, module):
     else:
         try:
             save_version(setup_dir, module, git_latest_tag() + git_timestamp_tag())
-        except subprocess.CalledProcessError:
+        except (subprocess.CalledProcessError, OSError):
             pass
 
     return read_version(setup_dir, module)
index 90b6d9ddc7b4ab0f7ff38c74c42f70f751c3c69a..522aa73b0a0545a0cb9fa4b58184877581fb7752 100644 (file)
@@ -126,7 +126,7 @@ class ConfigLoader
         if cfg[k].is_a? Integer
           cfg[k] = cfg[k].seconds
         elsif cfg[k].is_a? String
-          cfg[k] = ConfigLoader.parse_duration cfg[k]
+          cfg[k] = ConfigLoader.parse_duration(cfg[k], cfgkey: cfgkey)
         end
       end
 
@@ -134,6 +134,31 @@ class ConfigLoader
         cfg[k] = URI(cfg[k])
       end
 
+      if cfgtype == Integer && cfg[k].is_a?(String)
+        v = cfg[k].sub(/B\s*$/, '')
+        if mt = /(-?\d*\.?\d+)\s*([KMGTPE]i?)$/.match(v)
+          if mt[1].index('.')
+            v = mt[1].to_f
+          else
+            v = mt[1].to_i
+          end
+          cfg[k] = v * {
+            'K' => 1000,
+            'Ki' => 1 << 10,
+            'M' => 1000000,
+            'Mi' => 1 << 20,
+           "G" =>  1000000000,
+           "Gi" => 1 << 30,
+           "T" =>  1000000000000,
+           "Ti" => 1 << 40,
+           "P" =>  1000000000000000,
+           "Pi" => 1 << 50,
+           "E" =>  1000000000000000000,
+           "Ei" => 1 << 60,
+          }[mt[2]]
+        end
+      end
+
       if !cfg[k].is_a? cfgtype
         raise "#{cfgkey} expected #{cfgtype} but was #{cfg[k].class}"
       end
@@ -155,13 +180,13 @@ class ConfigLoader
     end
   end
 
-  def self.parse_duration durstr
-    duration_re = /(\d+(\.\d+)?)(s|m|h)/
+  def self.parse_duration durstr, cfgkey:
+    duration_re = /-?(\d+(\.\d+)?)(s|m|h)/
     dursec = 0
     while durstr != ""
       mt = duration_re.match durstr
       if !mt
-        raise "#{cfgkey} not a valid duration: '#{cfg[k]}', accepted suffixes are s, m, h"
+        raise "#{cfgkey} not a valid duration: '#{durstr}', accepted suffixes are s, m, h"
       end
       multiplier = {s: 1, m: 60, h: 3600}
       dursec += (Float(mt[1]) * multiplier[mt[3].to_sym])
index 5c5d45f4bc0c5a880775d638bc133752992a454d..f18adb5dbd7d1ad56c2575984a668240e21479a6 100644 (file)
@@ -72,6 +72,7 @@ locked:
   priority: 2
   created_at: <%= 2.minute.ago.to_s(:db) %>
   updated_at: <%= 2.minute.ago.to_s(:db) %>
+  modified_at: <%= 2.minute.ago.to_s(:db) %>
   container_image: test
   cwd: test
   output_path: test
index 3e829522af24de67e134166e8dc227b2ba7b9b61..efcad7f44c976e5118027e87e4e4d8d0ac100f5d 100755 (executable)
@@ -275,6 +275,11 @@ run() {
                        /var/lib/arvbox/service/api/run-service --only-setup
             fi
 
+           interactive=""
+           if [[ -z "$@" ]] ; then
+               interactive=--interactive
+           fi
+
             docker exec -ti \
                    -e LINES=$(tput lines) \
                    -e COLUMNS=$(tput cols) \
@@ -285,6 +290,7 @@ run() {
                    /usr/local/lib/arvbox/runsu.sh \
                    /usr/src/arvados/build/run-tests.sh \
                    --temp /var/lib/arvados/test \
+                  $interactive \
                    "$@"
         elif [[ "$CONFIG" = devenv ]] ; then
            if [[ $need_setup = 1 ]] ; then