SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-Metrics endpoints are found at @/status.json@ on many Arvados services. The purpose of metrics are to provide statistics about the operation of a service, suitable for diagnosing how well a service is performing under load.
+Some Arvados services publish Prometheus/OpenMetrics-compatible metrics at @/metrics@, and some provide additional runtime status at @/status.json@. Metrics can help you understand how components perform under load, find performance bottlenecks, and detect and diagnose problems.
-To access metrics endpoints, services must be configured with a "management token":management-token.html .
+To access metrics endpoints, services must be configured with a "management token":management-token.html. When accessing a metrics endpoint, prefix the management token with @"Bearer "@ and supply it in the @Authorization@ request header.
+
+<pre>curl -sfH "Authorization: Bearer your_management_token_goes_here" "https://0.0.0.0:25107/status.json"
+</pre>
+
+h2. Keep-web
+
+Keep-web exports metrics at @/metrics@ -- e.g., @https://collections.zzzzz.arvadosapi.com/metrics@.
+
+table(table table-bordered table-condensed).
+|_. Name|_. Type|_. Description|
+|request_duration_seconds|summary|elapsed time between receiving a request and sending the last byte of the response body (segmented by HTTP request method and response status code)|
+|time_to_status_seconds|summary|elapsed time between receiving a request and sending the HTTP response status code (segmented by HTTP request method and response status code)|
+
+Metrics in the @arvados_keepweb_collectioncache@ namespace report keep-web's internal cache of Arvados collection metadata.
+
+table(table table-bordered table-condensed).
+|_. Name|_. Type|_. Description|
+|arvados_keepweb_collectioncache_requests|counter|cache lookups|
+|arvados_keepweb_collectioncache_api_calls|counter|outgoing API calls|
+|arvados_keepweb_collectioncache_permission_hits|counter|collection-to-permission cache hits|
+|arvados_keepweb_collectioncache_pdh_hits|counter|UUID-to-PDH cache hits|
+|arvados_keepweb_collectioncache_hits|counter|PDH-to-manifest cache hits|
+|arvados_keepweb_collectioncache_cached_manifests|gauge|number of collections in the cache|
+|arvados_keepweb_collectioncache_cached_manifest_bytes|gauge|memory consumed by cached collection manifests|
h2. Keepstore
+Keepstore exports metrics at @/status.json@ -- e.g., @http://keep0.zzzzz.arvadosapi.com:25107/status.json@.
+
h3. Root
table(table table-bordered table-condensed).
TODO: extract this information based on git commit messages and generate changelogs / release notes automatically.
{% endcomment %}
-h3. 2018-08-09: "21bf21abe":https://dev.arvados.org/projects/arvados/repository/revisions/21bf21abe includes a slow migration
-* The migration removes the size constraint on the file_names column in the collections table, and reparses all manifests to fix any truncated values.
-* Migration time depends on the size and number of collections in the database. Our test clusters processed about 20K collections per minute.
-
h3. 2018-07-31: "#13497":https://dev.arvados.org/issues/13497 "db5107dca":https://dev.arvados.org/projects/arvados/repository/revisions/db5107dca adds a new system service, arvados-controller
* "Install the controller":../install/install-controller.html after upgrading your system.
* Verify your setup by confirming that API calls appear in the controller's logs (_e.g._, @journalctl -fu arvados-controller@) while loading a workbench page.
type Dispatcher struct {
Arv *arvadosclient.ArvadosClient
+ // Batch size for container queries
+ BatchSize int64
+
// Queue polling frequency
PollPeriod time.Duration
poll := time.NewTicker(d.PollPeriod)
defer poll.Stop()
+ if d.BatchSize == 0 {
+ d.BatchSize = 100
+ }
+
for {
select {
case <-poll.C:
}
func (d *Dispatcher) checkForUpdates(filters [][]interface{}, todo map[string]*runTracker) bool {
+ var countList arvados.ContainerList
params := arvadosclient.Dict{
"filters": filters,
+ "count": "exact",
+ "limit": 0,
+ "order": []string{"priority desc"}}
+ err := d.Arv.List("containers", params, &countList)
+ if err != nil {
+ log.Printf("error getting count of containers: %q", err)
+ return false
+ }
+ itemsAvailable := countList.ItemsAvailable
+ params = arvadosclient.Dict{
+ "filters": filters,
+ "count": "none",
+ "limit": d.BatchSize,
"order": []string{"priority desc"}}
offset := 0
for {
}
d.checkListForUpdates(list.Items, todo)
offset += len(list.Items)
- if len(list.Items) == 0 || list.ItemsAvailable <= offset {
+ if len(list.Items) == 0 || itemsAvailable <= offset {
return true
}
}
class IndexAllFilenames < ActiveRecord::Migration
def up
ActiveRecord::Base.connection.execute 'ALTER TABLE collections ALTER COLUMN file_names TYPE text'
- Collection.find_each(batch_size: 20) do |c|
- ActiveRecord::Base.connection.execute "UPDATE collections
- SET file_names = #{ActiveRecord::Base.connection.quote(c.manifest_files)}
- WHERE uuid = #{ActiveRecord::Base.connection.quote(c.uuid)}
- AND portable_data_hash = #{ActiveRecord::Base.connection.quote(c.portable_data_hash)}"
- end
end
def down
ActiveRecord::Base.connection.execute 'ALTER TABLE collections ALTER COLUMN file_names TYPE varchar(8192)'
apiStubResponses := make(map[string]arvadostest.StubResponse)
apiStubResponses["/arvados/v1/containers"] = arvadostest.StubResponse{500, string(`{}`)}
- testWithServerStub(c, apiStubResponses, "echo", "Error getting list of containers")
+ testWithServerStub(c, apiStubResponses, "echo", "error getting count of containers")
}
func (s *MockArvadosServerSuite) Test_APIErrorUpdatingContainerState(c *C) {
// Minimum time between two attempts to run the same container
MinRetryPeriod arvados.Duration
+
+ // Batch size for container queries
+ BatchSize int64
}
func main() {
}
disp.Dispatcher = &dispatch.Dispatcher{
Arv: arv,
+ BatchSize: disp.BatchSize,
RunContainer: disp.runContainer,
PollPeriod: time.Duration(disp.PollPeriod),
MinRetryPeriod: time.Duration(disp.MinRetryPeriod),
apiStubResponses["/arvados/v1/api_client_authorizations/current"] = arvadostest.StubResponse{200, `{"uuid":"` + arvadostest.Dispatch1AuthUUID + `"}`}
apiStubResponses["/arvados/v1/containers"] = arvadostest.StubResponse{500, string(`{}`)}
- s.testWithServerStub(c, apiStubResponses, "echo", "Error getting list of containers")
+ s.testWithServerStub(c, apiStubResponses, "echo", "error getting count of containers")
}
func (s *StubbedSuite) testWithServerStub(c *C, apiStubResponses map[string]arvadostest.StubResponse, crunchCmd string, expected string) {
"PollPeriod": "10s",
"SbatchArguments": ["--partition=foo", "--exclude=node13"],
"ReserveExtraRAM": 268435456,
+ "BatchSize": 10000
}`)
func usage(fs *flag.FlagSet) {