end
def render_markup(markup)
- raw RedCloth.new(markup.to_s).to_html(:refs_arvados, :textile) if markup
+ sanitize(raw(RedCloth.new(markup.to_s).to_html(:refs_arvados, :textile))) if markup
end
def human_readable_bytes_html(n)
end
return h(n)
- #raw = n.to_s
- #cooked = ''
- #while raw.length > 3
- # cooked = ',' + raw[-3..-1] + cooked
- # raw = raw[0..-4]
- #end
- #cooked = raw + cooked
end
def resource_class_for_uuid(attrvalue, opts={})
end
# Keep locators are expected to be of the form \"...<pdh/file_path>\"
- JSON_KEEP_LOCATOR_REGEXP = /(.*)(([0-9a-f]{32}\+\d+)(.*)\"(.*))/
+ JSON_KEEP_LOCATOR_REGEXP = /([0-9a-f]{32}\+\d+[^'"]*?)(?=['"]|\z|$)/
def keep_locator_in_json str
- JSON_KEEP_LOCATOR_REGEXP.match str
+ # Return a list of all matches
+ str.scan(JSON_KEEP_LOCATOR_REGEXP).flatten
end
private
<% data_height = data_height || 100 %>
<div style="max-height:<%=data_height%>px; overflow:auto;">
- <% text_data.each_line do |l| %>
- <% text_part = l %>
- <% match = keep_locator_in_json l %>
+ <% text_data.each_line do |line| %>
+ <% matches = keep_locator_in_json line %>
- <%
- if match
- text_part = match[1]
- rindex = match[2].rindex('"'); match2 = match[2][0..rindex-1]
- quote_char = '"'
+ <% if matches.nil? or matches.empty? %>
+ <span style="white-space: pre-wrap; margin: none;"><%= line %></span>
+ <% else
+ subs = []
+ matches.uniq.each do |loc|
+ pdh, filename = loc.split('/', 2)
- pdh_readable = object_readable(match2)
- file_link = ''
- if pdh_readable and match[4].size > 0
- link_params = {controller: 'collections', action: 'show_file', uuid: match[3], file: match[4][1..-1]}
- preview_allowed = preview_allowed_for(match[4])
- if preview_allowed
- file_link = link_to(raw(match[4]), link_params.merge(disposition: 'inline'))
- else
- file_link = link_to(raw(match[4]), link_params.merge(disposition: 'attachment'))
+ if object_readable(pdh)
+ # Add PDH link
+ replacement = link_to_arvados_object_if_readable(pdh, pdh, friendly_name: true)
+ if filename
+ link_params = {controller: 'collections', action: 'show_file', uuid: pdh, file: filename}
+ if preview_allowed_for(filename)
+ params = {disposition: 'inline'}
+ else
+ params = {disposition: 'attachment'}
+ end
+ file_link = link_to(raw("/"+filename), link_params.merge(params))
+ # Add file link
+ replacement << file_link
end
+ # Add link(s) substitution
+ subs << [loc, replacement]
end
end
- %>
-
- <span style="white-space: pre-wrap; margin: none;"><%= text_part %><% if match %><% if pdh_readable then %><%= link_to_arvados_object_if_readable(match[3], match[3], friendly_name: true) %><%= file_link%><% else %><%= match2%><% end %><%=quote_char+match[5]%><br/><% end %></span>
+ # Replace all readable locators with links
+ subs.each do |loc, link|
+ line.gsub!(loc, link)
+ end %>
+ <span style="white-space: pre-wrap; margin: none;"><%= raw line %></span>
+ <% end %>
<% end %>
</div>
assert_response :success
assert_match /hello/, @response.body
+ assert_includes @response.body, "href=\"\/collections/fa7aeb5140e2848d39b416daeef4ffc5+45/baz\?" # locator on command
+ assert_includes @response.body, "href=\"\/collections/fa7aeb5140e2848d39b416daeef4ffc5+45/foobar\?" # locator on command
assert_includes @response.body, "href=\"\/collections/fa7aeb5140e2848d39b416daeef4ffc5+45/foo" # mount input1
assert_includes @response.body, "href=\"\/collections/fa7aeb5140e2848d39b416daeef4ffc5+45/bar" # mount input2
assert_includes @response.body, "href=\"\/collections/1fd08fc162a5c6413070a8bd0bffc818+150" # mount workflow
project = api_fixture('groups')['aproject']
use_token :active
found = Group.find(project['uuid'])
- found.description = 'Textile description with link to home page <a href="/">take me home</a>.'
+ found.description = '<b>Textile</b> description with link to home page <a href="/">take me home</a>.'
found.save!
get(:show, {id: project['uuid']}, session_for(:active))
- assert_includes @response.body, 'Textile description with link to home page <a href="/">take me home</a>.'
+ assert_includes @response.body, '<b>Textile</b> description with link to home page <a href="/">take me home</a>.'
+ end
+
+ test "find a project and edit description to unsafe html description" do
+ project = api_fixture('groups')['aproject']
+ use_token :active
+ found = Group.find(project['uuid'])
+ found.description = 'Textile description with unsafe script tag <script language="javascript">alert("Hello there")</script>.'
+ found.save!
+ get(:show, {id: project['uuid']}, session_for(:active))
+ assert_includes @response.body, 'Textile description with unsafe script tag alert("Hello there").'
end
test "find a project and edit description to textile description with link to object" do
debian8,debian9,ubuntu1404,ubuntu1604,centos7|rdflib|4.2.2|2|python|all
debian8,debian9,ubuntu1404,centos7|shellescape|3.4.1|2|python|all
debian8,debian9,ubuntu1404,ubuntu1604,centos7|mistune|0.7.3|2|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,centos7|typing|3.6.2|2|python|all
+debian8,debian9,ubuntu1404,ubuntu1604,centos7|typing|3.6.4|2|python|all
debian8,debian9,ubuntu1404,ubuntu1604,centos7|avro|1.8.1|2|python|all
debian8,debian9,ubuntu1404,centos7|ruamel.ordereddict|0.4.9|2|python|amd64
debian8,debian9,ubuntu1404,ubuntu1604,centos7|cachecontrol|0.11.7|2|python|all
SPDX-License-Identifier: CC-BY-SA-3.0
{% endcomment %}
-Metrics endpoints are found at @/status.json@ on many Arvados services. The purpose of metrics are to provide statistics about the operation of a service, suitable for diagnosing how well a service is performing under load.
+Some Arvados services publish Prometheus/OpenMetrics-compatible metrics at @/metrics@, and some provide additional runtime status at @/status.json@. Metrics can help you understand how components perform under load, find performance bottlenecks, and detect and diagnose problems.
-To access metrics endpoints, services must be configured with a "management token":management-token.html .
+To access metrics endpoints, services must be configured with a "management token":management-token.html. When accessing a metrics endpoint, prefix the management token with @"Bearer "@ and supply it in the @Authorization@ request header.
+
+<pre>curl -sfH "Authorization: Bearer your_management_token_goes_here" "https://0.0.0.0:25107/status.json"
+</pre>
+
+h2. Keep-web
+
+Keep-web exports metrics at @/metrics@ -- e.g., @https://collections.zzzzz.arvadosapi.com/metrics@.
+
+table(table table-bordered table-condensed).
+|_. Name|_. Type|_. Description|
+|request_duration_seconds|summary|elapsed time between receiving a request and sending the last byte of the response body (segmented by HTTP request method and response status code)|
+|time_to_status_seconds|summary|elapsed time between receiving a request and sending the HTTP response status code (segmented by HTTP request method and response status code)|
+
+Metrics in the @arvados_keepweb_collectioncache@ namespace report keep-web's internal cache of Arvados collection metadata.
+
+table(table table-bordered table-condensed).
+|_. Name|_. Type|_. Description|
+|arvados_keepweb_collectioncache_requests|counter|cache lookups|
+|arvados_keepweb_collectioncache_api_calls|counter|outgoing API calls|
+|arvados_keepweb_collectioncache_permission_hits|counter|collection-to-permission cache hits|
+|arvados_keepweb_collectioncache_pdh_hits|counter|UUID-to-PDH cache hits|
+|arvados_keepweb_collectioncache_hits|counter|PDH-to-manifest cache hits|
+|arvados_keepweb_collectioncache_cached_manifests|gauge|number of collections in the cache|
+|arvados_keepweb_collectioncache_cached_manifest_bytes|gauge|memory consumed by cached collection manifests|
h2. Keepstore
+Keepstore exports metrics at @/status.json@ -- e.g., @http://keep0.zzzzz.arvadosapi.com:25107/status.json@.
+
h3. Root
table(table table-bordered table-condensed).
self.uuid = response["uuid"]
self.arvrunner.process_submitted(self)
- logger.info("%s submitted container %s", self.arvrunner.label(self), response["uuid"])
+ logger.info("%s submitted container_request %s", self.arvrunner.label(self), response["uuid"])
def done(self, record):
try:
p = sp[0]
if p.startswith("keep:") and arvados.util.keep_locator_pattern.match(p[5:]):
pdh = p[5:]
- return (self.collection_cache.get(pdh), sp[1] if len(sp) == 2 else None)
+ return (self.collection_cache.get(pdh), urlparse.unquote(sp[1]) if len(sp) == 2 else None)
else:
return (None, path)
sc = []
def only_real(obj):
+ # Only interested in local files than need to be uploaded,
+ # don't include file literals, keep references, etc.
if obj.get("location", "").startswith("file:"):
sc.append(obj)
visit_class(workflowobj, ("CommandLineTool", "Workflow"), discover_default_secondary_files)
- for d in discovered:
- sc.extend(discovered[d])
+ for d in list(discovered.keys()):
+ # Only interested in discovered secondaryFiles which are local
+ # files that need to be uploaded.
+ if d.startswith("file:"):
+ sc.extend(discovered[d])
+ else:
+ del discovered[d]
mapper = ArvPathMapper(arvrunner, sc, "",
"keep:%s",
install_requires=[
'cwltool==1.0.20180806194258',
'schema-salad==2.7.20180719125426',
- 'typing >= 3.5.3',
+ 'typing >= 3.6.4',
'ruamel.yaml >=0.13.11, <0.15',
'arvados-python-client>=1.1.4.20180607143841',
'setuptools',
--- /dev/null
+cwlVersion: v1.0
+class: CommandLineTool
+requirements:
+ - class: InlineJavascriptRequirement
+arguments:
+ - ls
+ - -l
+ - $(inputs.hello)
+inputs:
+ hello:
+ type: File
+ default:
+ class: File
+ location: keep:4d8a70b1e63b2aad6984e40e338e2373+69/hello.txt
+ secondaryFiles:
+ - .idx
+outputs: []
\ No newline at end of file
tool: 12418-glob-empty-collection.cwl
doc: "Test glob output on empty collection"
+- job: null
+ output:
+ out: null
+ tool: 13976-keepref-wf.cwl
+ doc: "Test issue 13976"
+
- job: null
output:
out: out
type Dispatcher struct {
Arv *arvadosclient.ArvadosClient
+ // Batch size for container queries
+ BatchSize int64
+
// Queue polling frequency
PollPeriod time.Duration
poll := time.NewTicker(d.PollPeriod)
defer poll.Stop()
+ if d.BatchSize == 0 {
+ d.BatchSize = 100
+ }
+
for {
select {
case <-poll.C:
}
func (d *Dispatcher) checkForUpdates(filters [][]interface{}, todo map[string]*runTracker) bool {
+ var countList arvados.ContainerList
params := arvadosclient.Dict{
"filters": filters,
+ "count": "exact",
+ "limit": 0,
+ "order": []string{"priority desc"}}
+ err := d.Arv.List("containers", params, &countList)
+ if err != nil {
+ log.Printf("error getting count of containers: %q", err)
+ return false
+ }
+ itemsAvailable := countList.ItemsAvailable
+ params = arvadosclient.Dict{
+ "filters": filters,
+ "count": "none",
+ "limit": d.BatchSize,
"order": []string{"priority desc"}}
offset := 0
for {
}
d.checkListForUpdates(list.Items, todo)
offset += len(list.Items)
- if len(list.Items) == 0 || list.ItemsAvailable <= offset {
+ if len(list.Items) == 0 || itemsAvailable <= offset {
return true
}
}
req_id = "req-" + Random::DEFAULT.rand(2**128).to_s(36)[0..19]
end
response.headers['X-Request-Id'] = Thread.current[:request_id] = req_id
- yield
+ Rails.logger.tagged(req_id) do
+ yield
+ end
Thread.current[:request_id] = nil
end
all_objects = []
@items_available = 0
+ # Reload the orders param, this time without prefixing unqualified
+ # columns ("name" => "groups.name"). Here, unqualified orders
+ # apply to each table being searched, not "groups".
+ load_limit_offset_order_params(fill_table_names: false)
+
# Trick apply_where_limit_order_params into applying suitable
# per-table values. *_all are the real ones we'll apply to the
# aggregate set.
# table_name for the current klass, apply that order.
# Otherwise, order by recency.
request_order =
- request_orders.andand.find { |r| r =~ /^#{klass.table_name}\./i } ||
+ request_orders.andand.find { |r| r =~ /^#{klass.table_name}\./i || r !~ /\./ } ||
klass.default_orders.join(", ")
@select = nil
class IndexAllFilenames < ActiveRecord::Migration
def up
ActiveRecord::Base.connection.execute 'ALTER TABLE collections ALTER COLUMN file_names TYPE text'
- Collection.find_each(batch_size: 20) do |c|
- ActiveRecord::Base.connection.execute "UPDATE collections
- SET file_names = #{ActiveRecord::Base.connection.quote(c.manifest_files)}
- WHERE uuid = #{ActiveRecord::Base.connection.quote(c.uuid)}
- AND portable_data_hash = #{ActiveRecord::Base.connection.quote(c.portable_data_hash)}"
- end
end
def down
ActiveRecord::Base.connection.execute 'ALTER TABLE collections ALTER COLUMN file_names TYPE varchar(8192)'
# Load params[:limit], params[:offset] and params[:order]
# into @limit, @offset, @orders
- def load_limit_offset_order_params
+ def load_limit_offset_order_params(fill_table_names: true)
if params[:limit]
unless params[:limit].to_s.match(/^\d+$/)
raise ArgumentError.new("Invalid value for limit parameter")
# has used set_table_name to use an alternate table name from the Rails standard.
# I could not find a perfect way to handle this well, but ActiveRecord::Base.send(:descendants)
# would be a place to start if this ever becomes necessary.
- if attr.match(/^[a-z][_a-z0-9]+$/) and
- model_class.columns.collect(&:name).index(attr) and
- ['asc','desc'].index direction.downcase
- @orders << "#{table_name}.#{attr} #{direction.downcase}"
+ if (attr.match(/^[a-z][_a-z0-9]+$/) &&
+ model_class.columns.collect(&:name).index(attr) &&
+ ['asc','desc'].index(direction.downcase))
+ if fill_table_names
+ @orders << "#{table_name}.#{attr} #{direction.downcase}"
+ else
+ @orders << "#{attr} #{direction.downcase}"
+ end
elsif attr.match(/^([a-z][_a-z0-9]+)\.([a-z][_a-z0-9]+)$/) and
['asc','desc'].index(direction.downcase) and
ActiveRecord::Base.connection.tables.include?($1) and
container_image: test
cwd: test
output_path: test
- command: ["echo", "hello"]
+ command: ["echo", "hello", "/bin/sh", "-c", "'cat' '/keep/fa7aeb5140e2848d39b416daeef4ffc5+45/foobar' '/keep/fa7aeb5140e2848d39b416daeef4ffc5+45/baz' '|' 'gzip' '>' '/dev/null'"]
runtime_constraints:
vcpus: 1
ram: 123
log: ea10d51bcf88862dbcc36eb292017dfd+45
output: 1f4b0bc7583c2a7f9102c395f4ffc5e3+45
output_path: test
- command: ["echo", "hello"]
+ command: ["echo", "hello", "/bin/sh", "-c", "'cat' '/keep/fa7aeb5140e2848d39b416daeef4ffc5+45/foobar' '/keep/fa7aeb5140e2848d39b416daeef4ffc5+45/baz' '|' 'gzip' '>' '/dev/null'"]
runtime_constraints:
ram: 12000000000
vcpus: 4
assert_includes ids, collections(:baz_file_in_asubproject).uuid
end
- [['asc', :<=],
- ['desc', :>=]].each do |order, operator|
- test "user with project read permission can sort project collections #{order}" do
+ [
+ ['collections.name', 'asc', :<=, "name"],
+ ['collections.name', 'desc', :>=, "name"],
+ ['name', 'asc', :<=, "name"],
+ ['name', 'desc', :>=, "name"],
+ ['collections.created_at', 'asc', :<=, "created_at"],
+ ['collections.created_at', 'desc', :>=, "created_at"],
+ ['created_at', 'asc', :<=, "created_at"],
+ ['created_at', 'desc', :>=, "created_at"],
+ ].each do |column, order, operator, field|
+ test "user with project read permission can sort projects on #{column} #{order}" do
authorize_with :project_viewer
get :contents, {
id: groups(:asubproject).uuid,
format: :json,
filters: [['uuid', 'is_a', "arvados#collection"]],
- order: "collections.name #{order}"
+ order: "#{column} #{order}"
}
- sorted_names = json_response['items'].collect { |item| item["name"] }
- # Here we avoid assuming too much about the database
- # collation. Both "alice"<"Bob" and "alice">"Bob" can be
- # correct. Hopefully it _is_ safe to assume that if "a" comes
- # before "b" in the ascii alphabet, "aX">"bY" is never true for
- # any strings X and Y.
- reliably_sortable_names = sorted_names.select do |name|
- name[0] >= 'a' and name[0] <= 'z'
- end.uniq do |name|
- name[0]
- end
- # Preserve order of sorted_names. But do not use &=. If
- # sorted_names has out-of-order duplicates, we want to preserve
- # them here, so we can detect them and fail the test below.
- sorted_names.select! do |name|
- reliably_sortable_names.include? name
- end
- actually_checked_anything = false
- previous = nil
- sorted_names.each do |entry|
- if previous
- assert_operator(previous, operator, entry,
- "Entries sorted incorrectly.")
- actually_checked_anything = true
+ sorted_values = json_response['items'].collect { |item| item[field] }
+ if field == "name"
+ # Here we avoid assuming too much about the database
+ # collation. Both "alice"<"Bob" and "alice">"Bob" can be
+ # correct. Hopefully it _is_ safe to assume that if "a" comes
+ # before "b" in the ascii alphabet, "aX">"bY" is never true for
+ # any strings X and Y.
+ reliably_sortable_names = sorted_values.select do |name|
+ name[0] >= 'a' && name[0] <= 'z'
+ end.uniq do |name|
+ name[0]
+ end
+ # Preserve order of sorted_values. But do not use &=. If
+ # sorted_values has out-of-order duplicates, we want to preserve
+ # them here, so we can detect them and fail the test below.
+ sorted_values.select! do |name|
+ reliably_sortable_names.include? name
end
- previous = entry
end
- assert actually_checked_anything, "Didn't even find two names to compare."
+ assert_sorted(operator, sorted_values)
+ end
+ end
+
+ def assert_sorted(operator, sorted_items)
+ actually_checked_anything = false
+ previous = nil
+ sorted_items.each do |entry|
+ if !previous.nil?
+ assert_operator(previous, operator, entry,
+ "Entries sorted incorrectly.")
+ actually_checked_anything = true
+ end
+ previous = entry
end
+ assert actually_checked_anything, "Didn't even find two items to compare."
end
test 'list objects across multiple projects' do
apiStubResponses := make(map[string]arvadostest.StubResponse)
apiStubResponses["/arvados/v1/containers"] = arvadostest.StubResponse{500, string(`{}`)}
- testWithServerStub(c, apiStubResponses, "echo", "Error getting list of containers")
+ testWithServerStub(c, apiStubResponses, "echo", "error getting count of containers")
}
func (s *MockArvadosServerSuite) Test_APIErrorUpdatingContainerState(c *C) {
// Minimum time between two attempts to run the same container
MinRetryPeriod arvados.Duration
+
+ // Batch size for container queries
+ BatchSize int64
}
func main() {
}
disp.Dispatcher = &dispatch.Dispatcher{
Arv: arv,
+ BatchSize: disp.BatchSize,
RunContainer: disp.runContainer,
PollPeriod: time.Duration(disp.PollPeriod),
MinRetryPeriod: time.Duration(disp.MinRetryPeriod),
apiStubResponses["/arvados/v1/api_client_authorizations/current"] = arvadostest.StubResponse{200, `{"uuid":"` + arvadostest.Dispatch1AuthUUID + `"}`}
apiStubResponses["/arvados/v1/containers"] = arvadostest.StubResponse{500, string(`{}`)}
- s.testWithServerStub(c, apiStubResponses, "echo", "Error getting list of containers")
+ s.testWithServerStub(c, apiStubResponses, "echo", "error getting count of containers")
}
func (s *StubbedSuite) testWithServerStub(c *C, apiStubResponses map[string]arvadostest.StubResponse, crunchCmd string, expected string) {
// adjust slurm job nice values as needed to ensure slurm priority
// order matches Arvados priority order.
func (sqc *SqueueChecker) reniceAll() {
- sqc.lock.RLock()
- defer sqc.lock.RUnlock()
-
+ // This is slow (it shells out to scontrol many times) and no
+ // other goroutines update sqc.queue or any of the job fields
+ // we use here, so we don't acquire a lock.
jobs := make([]*slurmJob, 0, len(sqc.queue))
for _, j := range sqc.queue {
if j.wantPriority == 0 {
// queued). If it succeeds, it updates sqc.queue and wakes up any
// goroutines that are waiting in HasUUID() or All().
func (sqc *SqueueChecker) check() {
- sqc.lock.Lock()
- defer sqc.lock.Unlock()
-
cmd := sqc.Slurm.QueueCommand([]string{"--all", "--noheader", "--format=%j %y %Q %T %r"})
stdout, stderr := &bytes.Buffer{}, &bytes.Buffer{}
cmd.Stdout, cmd.Stderr = stdout, stderr
log.Printf("warning: ignoring unparsed line in squeue output: %q", line)
continue
}
+
+ // No other goroutines write to jobs' priority or nice
+ // fields, so we can read and write them without
+ // locks.
replacing, ok := sqc.queue[uuid]
if !ok {
replacing = &slurmJob{uuid: uuid}
log.Printf("warning: job %q has low priority %d, nice %d, state %q, reason %q", uuid, p, n, state, reason)
}
}
+ sqc.lock.Lock()
sqc.queue = newq
+ sqc.lock.Unlock()
sqc.notify.Broadcast()
}
"PollPeriod": "10s",
"SbatchArguments": ["--partition=foo", "--exclude=node13"],
"ReserveExtraRAM": 268435456,
+ "BatchSize": 10000
}`)
func usage(fs *flag.FlagSet) {