end
end
if resource_class == Collection
- dst.manifest_text = Collection.select([:manifest_text]).where(uuid: src.uuid).first.manifest_text
+ dst.manifest_text = Collection.select([:manifest_text]).where(uuid: src.uuid).with_count("none").first.manifest_text
end
when :move
dst = src
uuids, source_paths = selected_collection_files params
new_coll = Arv::Collection.new
- Collection.where(uuid: uuids.uniq).
+ Collection.where(uuid: uuids.uniq).with_count("none").
select([:uuid, :manifest_text]).each do |coll|
src_coll = Arv::Collection.new(coll.manifest_text)
src_pathlist = source_paths[coll.uuid]
def is_starred
links = Link.where(tail_uuid: current_user.uuid,
head_uuid: @object.uuid,
- link_class: 'star')
+ link_class: 'star').with_count("none")
return links.andand.any?
end
@@notification_tests.push lambda { |controller, current_user|
return nil if Rails.configuration.Services.WebShell.ExternalURL != URI("")
- AuthorizedKey.limit(1).where(authorized_user_uuid: current_user.uuid).each do
+ AuthorizedKey.limit(1).with_count('none').where(authorized_user_uuid: current_user.uuid).each do
return nil
end
return lambda { |view|
}
@@notification_tests.push lambda { |controller, current_user|
- Collection.limit(1).where(created_by: current_user.uuid).each do
+ Collection.limit(1).with_count('none').where(created_by: current_user.uuid).each do
return nil
end
return lambda { |view|
@@notification_tests.push lambda { |controller, current_user|
if PipelineInstance.api_exists?(:index)
- PipelineInstance.limit(1).where(created_by: current_user.uuid).each do
+ PipelineInstance.limit(1).with_count('none').where(created_by: current_user.uuid).each do
return nil
end
else
helper_method :recent_jobs_and_pipelines
def recent_jobs_and_pipelines
(Job.limit(10) |
- PipelineInstance.limit(10)).
+ PipelineInstance.limit(10).with_count("none")).
sort_by do |x|
(x.finished_at || x.started_at rescue nil) || x.modified_at || x.created_at
end.reverse
helper_method :running_pipelines
def running_pipelines
- pi = PipelineInstance.order(["started_at asc", "created_at asc"]).filter([["state", "in", ["RunningOnServer", "RunningOnClient"]]])
+ pi = PipelineInstance.order(["started_at asc", "created_at asc"]).with_count("none").filter([["state", "in", ["RunningOnServer", "RunningOnClient"]]])
jobs = {}
pi.each do |pl|
pl.components.each do |k,v|
end
if jobs.keys.any?
- Job.filter([["uuid", "in", jobs.keys]]).each do |j|
+ Job.filter([["uuid", "in", jobs.keys]]).with_count("none").each do |j|
jobs[j[:uuid]] = j
end
procs = {}
if PipelineInstance.api_exists?(:index)
cols = %w(uuid owner_uuid created_at modified_at pipeline_template_uuid name state started_at finished_at)
- pipelines = PipelineInstance.select(cols).limit(lim).order(["created_at desc"])
+ pipelines = PipelineInstance.select(cols).limit(lim).order(["created_at desc"]).with_count("none")
pipelines.results.each { |pi| procs[pi] = pi.created_at }
end
- crs = ContainerRequest.limit(lim).order(["created_at desc"]).filter([["requesting_container_uuid", "=", nil]])
+ crs = ContainerRequest.limit(lim).with_count("none").order(["created_at desc"]).filter([["requesting_container_uuid", "=", nil]])
crs.results.each { |c| procs[c] = c.created_at }
Hash[procs.sort_by {|key, value| value}].keys.reverse.first(lim)
helper_method :recent_collections
def recent_collections lim
- c = Collection.limit(lim).order(["modified_at desc"]).results
+ c = Collection.limit(lim).with_count("none").order(["modified_at desc"]).results
own = {}
- Group.filter([["uuid", "in", c.map(&:owner_uuid)]]).each do |g|
+ Group.filter([["uuid", "in", c.map(&:owner_uuid)]]).with_count("none").each do |g|
own[g[:uuid]] = g
end
{collections: c, owners: own}
return if defined?(@starred_projects) && @starred_projects
links = Link.filter([['tail_uuid', '=', user.uuid],
['link_class', '=', 'star'],
- ['head_uuid', 'is_a', 'arvados#group']]).select(%w(head_uuid))
+ ['head_uuid', 'is_a', 'arvados#group']]).with_count("none").select(%w(head_uuid))
uuids = links.collect { |x| x.head_uuid }
- starred_projects = Group.filter([['uuid', 'in', uuids]]).order('name')
+ starred_projects = Group.filter([['uuid', 'in', uuids]]).order('name').with_count("none")
@starred_projects = starred_projects.results
end
end
# TODO: make sure we get every page of results from API server
- Link.filter([['head_uuid', 'in', uuids]]).each do |link|
+ Link.filter([['head_uuid', 'in', uuids]]).with_count("none").each do |link|
@all_links_for[link.head_uuid] << link
end
@all_links_for
end
# TODO: make sure we get every page of results from API server
- Collection.where(uuid: uuids).each do |collection|
+ Collection.where(uuid: uuids).with_count("none").each do |collection|
@all_collections_for[collection.uuid] << collection
end
@all_collections_for
end
# TODO: make sure we get every page of results from API server
- Collection.where(uuid: uuids).each do |collection|
+ Collection.where(uuid: uuids).with_count("none").each do |collection|
@all_log_collections_for[collection.uuid] << collection
end
@all_log_collections_for
@all_pdhs_for[x] = []
end
- Collection.select(%w(portable_data_hash)).where(portable_data_hash: pdhs).distinct().each do |collection|
+ Collection.select(%w(portable_data_hash)).where(portable_data_hash: pdhs).distinct().with_count("none").each do |collection|
@all_pdhs_for[collection.portable_data_hash] << collection
end
@all_pdhs_for
end
unless link_uuids.empty?
- Link.select([:head_uuid]).where(uuid: link_uuids).each do |link|
+ Link.select([:head_uuid]).where(uuid: link_uuids).with_count("none").each do |link|
if ArvadosBase::resource_class_for_uuid(link.head_uuid) == Collection
coll_ids << link.head_uuid
end
end
unless pdhs.empty?
- Collection.where(portable_data_hash: pdhs.uniq).
+ Collection.where(portable_data_hash: pdhs.uniq).with_count("none").
select([:uuid, :portable_data_hash]).each do |coll|
unless source_paths[coll.portable_data_hash].empty?
uuids << coll.uuid
['link_class', '=', 'resources'],
['name', '=', 'wants'],
['tail_uuid', '=', current_user.uuid],
- ['head_uuid', '=', @object.uuid]])
+ ['head_uuid', '=', @object.uuid]]).with_count("none")
logger.debug persist_links.inspect
else
return unprocessable "Invalid value #{value.inspect}"
@select ||= Collection.columns.map(&:name)
base_search = Collection.select(@select)
if params[:search].andand.length.andand > 0
- tags = Link.where(any: ['contains', params[:search]])
+ tags = Link.where(any: ['contains', params[:search]]).with_count("none")
@objects = (base_search.where(uuid: tags.collect(&:head_uuid)) |
base_search.where(any: ['contains', params[:search]])).
uniq { |c| c.uuid }
@objects = base_search.limit(limit).offset(offset)
end
- @links = Link.where(head_uuid: @objects.collect(&:uuid))
+ @links = Link.where(head_uuid: @objects.collect(&:uuid)).with_count("none")
@collection_info = {}
@objects.each do |c|
@collection_info[c.uuid] = {
else
if Job.api_exists?(:index)
jobs_with = lambda do |conds|
- Job.limit(RELATION_LIMIT).where(conds)
+ Job.limit(RELATION_LIMIT).with_count("none").where(conds)
.results.sort_by { |j| j.finished_at || j.created_at }
end
@output_of = jobs_with.call(output: @object.portable_data_hash)
@log_of = jobs_with.call(log: @object.portable_data_hash)
end
- @project_links = Link.limit(RELATION_LIMIT).order("modified_at DESC")
+ @project_links = Link.limit(RELATION_LIMIT).with_count("none").order("modified_at DESC")
.where(head_uuid: @object.uuid, link_class: 'name').results
- project_hash = Group.where(uuid: @project_links.map(&:tail_uuid)).to_hash
+ project_hash = Group.where(uuid: @project_links.map(&:tail_uuid)).with_count("none").to_hash
@projects = project_hash.values
- @permissions = Link.limit(RELATION_LIMIT).order("modified_at DESC")
+ @permissions = Link.limit(RELATION_LIMIT).with_count("none").order("modified_at DESC")
.where(head_uuid: @object.uuid, link_class: 'permission',
name: 'can_read').results
@search_sharing = search_scopes
# Search for child CRs
if cr[:container_uuid]
- child_crs = ContainerRequest.where(requesting_container_uuid: cr[:container_uuid])
+ child_crs = ContainerRequest.where(requesting_container_uuid: cr[:container_uuid]).with_count("none")
child_crs.each do |child|
nodes[child[:uuid]] = child
# Batch requests to get all related collections
# First fetch output collections by UUID.
- Collection.filter([['uuid', 'in', col_uuids.uniq]]).each do |c|
+ Collection.filter([['uuid', 'in', col_uuids.uniq]]).with_count("none").each do |c|
output_pdhs << c[:portable_data_hash]
pdh_to_col[c[:portable_data_hash]] = c
nodes[c[:uuid]] = c
end
# Next, get input collections by PDH.
Collection.filter(
- [['portable_data_hash', 'in', col_pdhs - output_pdhs]]).each do |c|
+ [['portable_data_hash', 'in', col_pdhs - output_pdhs]]).with_count("none").each do |c|
nodes[c[:portable_data_hash]] = c
end
def cancel
if @object.container_uuid
- c = Container.select(['state']).where(uuid: @object.container_uuid).first
+ c = Container.select(['state']).where(uuid: @object.container_uuid).with_count("none").first
if c && c.state != 'Running'
# If the container hasn't started yet, setting priority=0
# leaves our request in "Committed" state and doesn't cancel
def index
@groups = Group.filter [['group_class', '!=', 'project']]
@group_uuids = @groups.collect &:uuid
- @links_from = Link.where link_class: 'permission', tail_uuid: @group_uuids
- @links_to = Link.where link_class: 'permission', head_uuid: @group_uuids
+ @links_from = Link.where(link_class: 'permission', tail_uuid: @group_uuids).with_count("none")
+ @links_to = Link.where(link_class: 'permission', head_uuid: @group_uuids).with_count("none")
render_index
end
nodes[j[:script_version]] = {:uuid => j[:script_version]}
end
- Collection.where(uuid: collections).each do |c|
+ Collection.where(uuid: collections).with_count("none").each do |c|
nodes[c[:portable_data_hash]] = c
end
- Collection.where(portable_data_hash: hashes).each do |c|
+ Collection.where(portable_data_hash: hashes).with_count("none").each do |c|
nodes[c[:portable_data_hash]] = c
end
jobs = jobs.compact.uniq
if jobs.any?
- Job.where(uuid: jobs).each do |j|
+ Job.where(uuid: jobs).with_count("none").each do |j|
job_uuid = j.uuid
provenance[job_uuid] = j
hashes = hashes.compact.uniq
if hashes.any?
- Collection.where(portable_data_hash: hashes).each do |c|
+ Collection.where(portable_data_hash: hashes).with_count("none").each do |c|
hash_uuid = c.portable_data_hash
provenance[hash_uuid] = c
pips[hash_uuid] = 0 unless pips[hash_uuid] != nil
collections = collections.compact.uniq
if collections.any?
- Collection.where(uuid: collections).each do |c|
+ Collection.where(uuid: collections).with_count("none").each do |c|
collection_uuid = c.uuid
provenance[collection_uuid] = c
pips[collection_uuid] = 0 unless pips[collection_uuid] != nil
def destroy
while (objects = Link.filter([['owner_uuid','=',@object.uuid],
- ['tail_uuid','=',@object.uuid]])).any?
+ ['tail_uuid','=',@object.uuid]]).with_count("none")).any?
objects.each do |object|
object.destroy
end
if !owner_filter.andand.any?
filters = @filters + [["owner_uuid", "=", current_user.uuid]]
- my_repos = Repository.all.order("name ASC").limit(limit).offset(offset).filter(filters).results
+ my_repos = Repository.all.order("name ASC").limit(limit).with_count("none").offset(offset).filter(filters).results
else # done fetching all owned repositories
my_repos = []
end
end
filters = @filters + [["owner_uuid", "!=", current_user.uuid]]
- other_repos = Repository.all.order("name ASC").limit(limit).offset(offset).filter(filters).results
+ other_repos = Repository.all.order("name ASC").limit(limit).with_count("none").offset(offset).filter(filters).results
@objects = (my_repos + other_repos).first(limit)
end
base_search = base_search.filter([["modified_at", "<=", last_mod_at], ["uuid", "not in", last_uuids]])
end
- base_search = base_search.include_trash(true).limit(limit).offset(offset)
+ base_search = base_search.include_trash(true).limit(limit).with_count("none").offset(offset)
if params[:filters].andand.length.andand > 0
- tags = Link.filter(params[:filters])
+ tags = Link.filter(params[:filters]).with_count("none")
tagged = []
if tags.results.length > 0
tagged = query_on.include_trash(true).where(uuid: tags.collect(&:head_uuid))
owner_uuids = @objects.collect(&:owner_uuid).uniq
@owners = {}
@not_trashed = {}
- Group.filter([["uuid", "in", owner_uuids]]).include_trash(true).each do |grp|
+ Group.filter([["uuid", "in", owner_uuids]]).with_count("none").include_trash(true).each do |grp|
@owners[grp.uuid] = grp
end
- User.filter([["uuid", "in", owner_uuids]]).include_trash(true).each do |grp|
+ User.filter([["uuid", "in", owner_uuids]]).with_count("none").include_trash(true).each do |grp|
@owners[grp.uuid] = grp
@not_trashed[grp.uuid] = true
end
- Group.filter([["uuid", "in", owner_uuids]]).select([:uuid]).each do |grp|
+ Group.filter([["uuid", "in", owner_uuids]]).with_count("none").select([:uuid]).each do |grp|
@not_trashed[grp.uuid] = true
end
else
def activity
@breadcrumb_page_name = nil
- @users = User.limit(params[:limit])
+ @users = User.limit(params[:limit]).with_count("none")
@user_activity = {}
@activity = {
logins: {},
filter([[:event_type, '=', 'login'],
[:object_kind, '=', 'arvados#user'],
[:created_at, '>=', threshold_start],
- [:created_at, '<', threshold_end]])
+ [:created_at, '<', threshold_end]]).with_count("none")
@activity[:jobs][span] = Job.select(%w(uuid modified_by_user_uuid)).
filter([[:created_at, '>=', threshold_start],
- [:created_at, '<', threshold_end]])
+ [:created_at, '<', threshold_end]]).with_count("none")
@activity[:pipeline_instances][span] = PipelineInstance.select(%w(uuid modified_by_user_uuid)).
filter([[:created_at, '>=', threshold_start],
- [:created_at, '<', threshold_end]])
+ [:created_at, '<', threshold_end]]).with_count("none")
@activity.each do |type, act|
records = act[span]
@users.each do |u|
def storage
@breadcrumb_page_name = nil
- @users = User.limit(params[:limit])
+ @users = User.limit(params[:limit]).with_count("none")
@user_storage = {}
total_storage = {}
@log_date = {}
@my_jobs = Job.
limit(10).
order('created_at desc').
+ with_count('none').
where(created_by: current_user.uuid)
@my_collections = Collection.
limit(10).
order('created_at desc').
+ with_count('none').
where(created_by: current_user.uuid)
collection_uuids = @my_collections.collect &:uuid
end
Link.filter([['head_uuid', 'in', collection_uuids],
- ['link_class', 'in', ['tag', 'resources']]]).
+ ['link_class', 'in', ['tag', 'resources']]]).with_count("none")
each do |link|
case link.link_class
when 'tag'
@my_pipelines = PipelineInstance.
limit(10).
order('created_at desc').
+ with_count('none').
where(created_by: current_user.uuid)
respond_to do |f|
@my_vm_logins = {}
Link.where(tail_uuid: @object.uuid,
link_class: 'permission',
- name: 'can_login').
+ name: 'can_login').with_count("none").
each do |perm_link|
if perm_link.properties.andand[:username]
@my_vm_logins[perm_link.head_uuid] ||= []
@my_vm_logins[perm_link.head_uuid] << perm_link.properties[:username]
end
end
- @my_virtual_machines = VirtualMachine.where(uuid: @my_vm_logins.keys)
+ @my_virtual_machines = VirtualMachine.where(uuid: @my_vm_logins.keys).with_count("none")
end
def ssh_keys
oid_login_perms = Link.where(tail_uuid: user.email,
head_kind: 'arvados#user',
link_class: 'permission',
- name: 'can_login')
+ name: 'can_login').with_count("none")
if oid_login_perms.any?
prefix_properties = oid_login_perms.first.properties
repo_perms = Link.where(tail_uuid: user.uuid,
head_kind: 'arvados#repository',
link_class: 'permission',
- name: 'can_write')
+ name: 'can_write').with_count("none")
if repo_perms.any?
repo_uuid = repo_perms.first.head_uuid
- repos = Repository.where(head_uuid: repo_uuid)
+ repos = Repository.where(head_uuid: repo_uuid).with_count("none")
if repos.any?
repo_name = repos.first.name
current_selections[:repo_name] = repo_name
vm_login_perms = Link.where(tail_uuid: user.uuid,
head_kind: 'arvados#virtualMachine',
link_class: 'permission',
- name: 'can_login')
+ name: 'can_login').with_count("none")
if vm_login_perms.any?
vm_perm = vm_login_perms.first
vm_uuid = vm_perm.head_uuid
Link.where(tail_uuid: current_user.uuid,
head_uuid: @objects.collect(&:uuid),
link_class: 'permission',
- name: 'can_login').
+ name: 'can_login').with_count("none").
each do |perm_link|
if perm_link.properties.andand[:username]
@vm_logins[perm_link.head_uuid] ||= []
# get next page of pipeline_templates
if PipelineTemplate.api_exists?(:index)
filters = @filters + [["uuid", "is_a", ["arvados#pipelineTemplate"]]]
- pipelines = PipelineTemplate.limit(@limit).order(["created_at desc"]).filter(filters)
+ pipelines = PipelineTemplate.limit(@limit).with_count("none").order(["created_at desc"]).filter(filters)
end
# get next page of workflows
filters = @filters + [["uuid", "is_a", ["arvados#workflow"]]]
- workflows = Workflow.limit(@limit).order(["created_at desc"]).filter(filters)
+ workflows = Workflow.limit(@limit).order(["created_at desc"]).with_count("none").filter(filters)
@objects = (pipelines.to_a + workflows.to_a).sort_by(&:created_at).reverse.first(@limit)
# get next page of pipeline_instances
if PipelineInstance.api_exists?(:index)
filters = @filters + [["uuid", "is_a", ["arvados#pipelineInstance"]]]
- pipelines = PipelineInstance.limit(@limit).order(["created_at desc"]).filter(filters)
+ pipelines = PipelineInstance.limit(@limit).order(["created_at desc"]).filter(filters).with_count("none")
end
if params[:show_children]
# get next page of jobs
if Job.api_exists?(:index)
filters = @filters + [["uuid", "is_a", ["arvados#job"]]]
- jobs = Job.limit(@limit).order(["created_at desc"]).filter(filters)
+ jobs = Job.limit(@limit).order(["created_at desc"]).filter(filters).with_count("none")
end
end
if !params[:show_children]
filters << ["requesting_container_uuid", "=", nil]
end
- crs = ContainerRequest.limit(@limit).order(["created_at desc"]).filter(filters)
+ crs = ContainerRequest.limit(@limit).order(["created_at desc"]).filter(filters).with_count("none")
@objects = (jobs.to_a + pipelines.to_a + crs.to_a).sort_by(&:created_at).reverse.first(@limit)
if @objects.any?
c[:job][:uuid] if c.is_a?(Hash) and c[:job].is_a?(Hash)
}.compact
job = {}
- Job.where(uuid: jobuuids).each do |j|
+ Job.where(uuid: jobuuids).with_count("none").each do |j|
job[j[:uuid]] = j
end
container_uuid = if @proxied.is_a?(Container) then uuid else get(:container_uuid) end
if container_uuid
cols = ContainerRequest.columns.map(&:name) - %w(id updated_at mounts secret_mounts runtime_token)
- my_children = @child_proxies || ContainerRequest.select(cols).where(requesting_container_uuid: container_uuid).results if !my_children
+ my_children = @child_proxies || ContainerRequest.select(cols).where(requesting_container_uuid: container_uuid).with_count("none").results if !my_children
my_child_containers = my_children.map(&:container_uuid).compact.uniq
grandchildren = {}
my_child_containers.each { |c| grandchildren[c] = []} if my_child_containers.any?
- reqs = ContainerRequest.select(cols).where(requesting_container_uuid: my_child_containers).results if my_child_containers.any?
+ reqs = ContainerRequest.select(cols).where(requesting_container_uuid: my_child_containers).with_count("none").results if my_child_containers.any?
reqs.each {|cr| grandchildren[cr.requesting_container_uuid] << cr} if reqs
my_children.each do |cr|
Keep::Locator.parse(loc_s)
end
if log_pdhs.any? and
- Collection.where(portable_data_hash: log_pdhs).limit(1).results.any?
+ Collection.where(portable_data_hash: log_pdhs).limit(1).with_count("none").results.any?
true
elsif log_uuids.any? and
- Collection.where(uuid: log_uuids).limit(1).results.any?
+ Collection.where(uuid: log_uuids).limit(1).with_count("none").results.any?
true
else
stderr_log_query(1).results.any?
items = []
jobs = {}
- results = Job.where(uuid: @proxied.job_ids.values).results
+ results = Job.where(uuid: @proxied.job_ids.values).with_count("none").results
results.each do |j|
jobs[j.uuid] = j
end
case "$TARGET" in
centos*)
- fpm_depends+=(git arvados-server)
+ fpm_depends+=(git)
;;
debian* | ubuntu*)
- fpm_depends+=(git g++ arvados-server)
+ fpm_depends+=(git g++)
;;
esac
].each do |token, user, invited, has_profile|
test "visit home page for user #{token}" do
+ Rails.configuration.Users.AnonymousUserToken = ""
if !token
visit ('/')
else
test "no SSH public key notification when shell_in_a_box_url is configured" do
Rails.configuration.Services.WebShell.ExternalURL = URI('http://example.com')
+ Rails.configuration.Users.AnonymousUserToken = ""
visit page_with_token('job_reader')
click_link 'notifications-menu'
assert_no_selector 'a', text:'Click here to set up an SSH public key for use with Arvados.'
['job_reader2', false],
].each do |user, readable|
test "view job with components as #{user} user" do
+ Rails.configuration.Users.AnonymousUserToken = ""
job = api_fixture('jobs')['running_job_with_components']
component1 = api_fixture('jobs')['completed_job_in_publicly_accessible_project']
component2 = api_fixture('pipeline_instances')['running_pipeline_with_complete_job']
RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle
-# Workbench depends on arvados-server for config manipulation
-ENV GOPATH /tmp
-RUN mkdir -p $GOPATH/src/git.curoverse.com && ln -sT /tmp/arvados $GOPATH/src/git.curoverse.com/arvados.git && cd $GOPATH/src/git.curoverse.com/arvados.git/cmd/arvados-server && go get -v github.com/kardianos/govendor && $GOPATH/bin/govendor sync && go get && go build && cp arvados-server /usr/local/bin/ && rm -rf /tmp/arvados
-
# The version of setuptools that comes with CentOS is way too old
RUN pip install --upgrade setuptools
RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle
-# Workbench depends on arvados-server for config manipulation
-ENV GOPATH /tmp
-RUN mkdir -p $GOPATH/src/git.curoverse.com && ln -sT /tmp/arvados $GOPATH/src/git.curoverse.com/arvados.git && cd $GOPATH/src/git.curoverse.com/arvados.git/cmd/arvados-server && go get -v github.com/kardianos/govendor && $GOPATH/bin/govendor sync && go get && go build && cp arvados-server /usr/local/bin/ && rm -rf /tmp/arvados
-
ENV WORKSPACE /arvados
CMD ["/usr/local/rvm/bin/rvm-exec", "default", "bash", "/jenkins/run-build-packages.sh", "--target", "debian9"]
RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle
-# Workbench depends on arvados-server for config manipulation
-ENV GOPATH /tmp
-RUN mkdir -p $GOPATH/src/git.curoverse.com && ln -sT /tmp/arvados $GOPATH/src/git.curoverse.com/arvados.git && cd $GOPATH/src/git.curoverse.com/arvados.git/cmd/arvados-server && go get -v github.com/kardianos/govendor && $GOPATH/bin/govendor sync && go get && go build && cp arvados-server /usr/local/bin/ && rm -rf /tmp/arvados
-
ENV WORKSPACE /arvados
CMD ["/usr/local/rvm/bin/rvm-exec", "default", "bash", "/jenkins/run-build-packages.sh", "--target", "ubuntu1604"]
RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle
-# Workbench depends on arvados-server for config manipulation
-ENV GOPATH /tmp
-RUN mkdir -p $GOPATH/src/git.curoverse.com && ln -sT /tmp/arvados $GOPATH/src/git.curoverse.com/arvados.git && cd $GOPATH/src/git.curoverse.com/arvados.git/cmd/arvados-server && go get -v github.com/kardianos/govendor && $GOPATH/bin/govendor sync && go get && go build && cp arvados-server /usr/local/bin/ && rm -rf /tmp/arvados
-
ENV WORKSPACE /arvados
CMD ["/usr/local/rvm/bin/rvm-exec", "default", "bash", "/jenkins/run-build-packages.sh", "--target", "ubuntu1804"]
set +e
mv -f ${WORKSPACE}/packages/${TARGET}/* ${WORKSPACE}/packages/${TARGET}/processed/ 2>/dev/null
set -e
-set -x
# Build packages.
if docker run \
--rm \
fpm_build_virtualenv "cwltest" "cwltest"
rm -rf "$WORKSPACE/cwltest"
+calculate_go_package_version arvados_server_version cmd/arvados-server
+arvados_server_iteration=$(default_iteration "arvados-server" "$arvados_server_version" "go")
+
# Build the API server package
test_rails_package_presence arvados-api-server "$WORKSPACE/services/api"
if [[ "$?" == "0" ]]; then
handle_rails_package arvados-api-server "$WORKSPACE/services/api" \
"$WORKSPACE/agpl-3.0.txt" --url="https://arvados.org" \
--description="Arvados API server - Arvados is a free and open source platform for big data science." \
- --license="GNU Affero General Public License, version 3.0"
+ --license="GNU Affero General Public License, version 3.0" --depends "arvados-server = ${arvados_server_version}-${arvados_server_iteration}"
fi
# Build the workbench server package
if [[ "$?" == "0" ]] ; then
(
set -e
+
+ # The workbench package has a build-time dependency on the arvados-server
+ # package for config manipulation, so install it first.
+ cd $WORKSPACE/cmd/arvados-server
+ get_complete_package_name arvados_server_pkgname arvados-server ${arvados_server_version} go
+
+ arvados_server_pkg_path="$WORKSPACE/packages/$TARGET/${arvados_server_pkgname}"
+ if [[ ! -e ${arvados_server_pkg_path} ]]; then
+ arvados_server_pkg_path="$WORKSPACE/packages/$TARGET/processed/${arvados_server_pkgname}"
+ fi
+ if [[ "$FORMAT" == "deb" ]]; then
+ dpkg -i ${arvados_server_pkg_path}
+ else
+ rpm -i ${arvados_server_pkg_path}
+ fi
+
cd "$WORKSPACE/apps/workbench"
# We need to bundle to be ready even when we build a package without vendor directory
mv /tmp/x /etc/arvados/config.yml
perl -p -i -e 'BEGIN{undef $/;} s/WebDAV(.*?):\n( *)ExternalURL: ""/WebDAV$1:\n$2ExternalURL: "example.com"/g' /etc/arvados/config.yml
- RAILS_ENV=production RAILS_GROUPS=assets bundle exec rake npm:install >/dev/null
- RAILS_ENV=production RAILS_GROUPS=assets bundle exec rake assets:precompile >/dev/null
+ RAILS_ENV=production RAILS_GROUPS=assets bundle exec rake npm:install >"$STDOUT_IF_DEBUG"
+ RAILS_ENV=production RAILS_GROUPS=assets bundle exec rake assets:precompile >"$STDOUT_IF_DEBUG"
# Remove generated configuration files so they don't go in the package.
rm -rf /etc/arvados/
handle_rails_package arvados-workbench "$WORKSPACE/apps/workbench" \
"$WORKSPACE/agpl-3.0.txt" --url="https://arvados.org" \
--description="Arvados Workbench - Arvados is a free and open source platform for big data science." \
- --license="GNU Affero General Public License, version 3.0"
+ --license="GNU Affero General Public License, version 3.0" --depends "arvados-server = ${arvados_server_version}-${arvados_server_iteration}"
fi
fi
--upload
If the build and test steps are successful, upload the packages
to a remote apt repository (default: false)
+--debug
+ Output debug information (default: false)
--rc
Optional Parameter to build Release Candidate
--build-version <version>
fi
PARSEDOPTS=$(getopt --name "$0" --longoptions \
- help,upload,rc,target:,build-version: \
+ help,debug,upload,rc,target:,build-version: \
-- "" "$@")
if [ $? -ne 0 ]; then
exit 1
TARGET=debian9
UPLOAD=0
RC=0
+DEBUG=
declare -a build_args=()
--target)
TARGET="$2"; shift
;;
+ --debug)
+ DEBUG=" --debug"
+ ;;
--upload)
UPLOAD=1
;;
title "Start build packages"
timer_reset
-$WORKSPACE/build/run-build-packages-one-target.sh "${build_args[@]}"
+$WORKSPACE/build/run-build-packages-one-target.sh "${build_args[@]}"$DEBUG
checkexit $? "build packages"
title "End of build packages (`timer`)"
timer_reset
if [ ${#failures[@]} -eq 0 ]; then
- $WORKSPACE/build/run-build-packages-one-target.sh "${build_args[@]}" --test-packages
+ $WORKSPACE/build/run-build-packages-one-target.sh "${build_args[@]}" --test-packages$DEBUG
else
echo "Skipping package upload, there were errors building the packages"
fi
fi
}
+calculate_go_package_version() {
+ # $__returnvar has the nameref attribute set, which means it is a reference
+ # to another variable that is passed in as the first argument to this function.
+ # see https://www.gnu.org/software/bash/manual/html_node/Shell-Parameters.html
+ local -n __returnvar="$1"; shift
+ local src_path="$1"; shift
+
+ mkdir -p "$GOPATH/src/git.curoverse.com"
+ ln -sfn "$WORKSPACE" "$GOPATH/src/git.curoverse.com/arvados.git"
+ (cd "$GOPATH/src/git.curoverse.com/arvados.git" && "$GOPATH/bin/govendor" sync -v)
+
+ cd "$GOPATH/src/git.curoverse.com/arvados.git/$src_path"
+ local version="$(version_from_git)"
+ local timestamp="$(timestamp_from_git)"
+
+ # Update the version number and build a new package if the vendor
+ # bundle has changed, or the command imports anything from the
+ # Arvados SDK and the SDK has changed.
+ declare -a checkdirs=(vendor)
+ if grep -qr git.curoverse.com/arvados .; then
+ checkdirs+=(sdk/go lib)
+ fi
+ for dir in ${checkdirs[@]}; do
+ cd "$GOPATH/src/git.curoverse.com/arvados.git/$dir"
+ ts="$(timestamp_from_git)"
+ if [[ "$ts" -gt "$timestamp" ]]; then
+ version=$(version_from_git)
+ timestamp="$ts"
+ fi
+ done
+
+ __returnvar="$version"
+}
+
# Usage: package_go_binary services/foo arvados-foo "Compute foo to arbitrary precision"
package_go_binary() {
local src_path="$1"; shift
local description="$1"; shift
local license_file="${1:-agpl-3.0.txt}"; shift
- if [[ -n "$ONLY_BUILD" ]] && [[ "$prog" != "$ONLY_BUILD" ]] ; then
+ if [[ -n "$ONLY_BUILD" ]] && [[ "$prog" != "$ONLY_BUILD" ]]; then
+ # arvados-workbench depends on arvados-server at build time, so even when
+ # only arvados-workbench is being built, we need to build arvados-server too
+ if [[ "$prog" != "arvados-server" ]] || [[ "$ONLY_BUILD" != "arvados-workbench" ]]; then
return 0
+ fi
fi
debug_echo "package_go_binary $src_path as $prog"
local basename="${src_path##*/}"
-
- mkdir -p "$GOPATH/src/git.curoverse.com"
- ln -sfn "$WORKSPACE" "$GOPATH/src/git.curoverse.com/arvados.git"
- (cd "$GOPATH/src/git.curoverse.com/arvados.git" && "$GOPATH/bin/govendor" sync -v)
-
- cd "$GOPATH/src/git.curoverse.com/arvados.git/$src_path"
- local version="$(version_from_git)"
- local timestamp="$(timestamp_from_git)"
-
- # Update the version number and build a new package if the vendor
- # bundle has changed, or the command imports anything from the
- # Arvados SDK and the SDK has changed.
- declare -a checkdirs=(vendor)
- if grep -qr git.curoverse.com/arvados .; then
- checkdirs+=(sdk/go lib)
- fi
- for dir in ${checkdirs[@]}; do
- cd "$GOPATH/src/git.curoverse.com/arvados.git/$dir"
- ts="$(timestamp_from_git)"
- if [[ "$ts" -gt "$timestamp" ]]; then
- version=$(version_from_git)
- timestamp="$ts"
- fi
- done
+ calculate_go_package_version go_package_version $src_path
cd $WORKSPACE/packages/$TARGET
- test_package_presence $prog $version go
+ test_package_presence $prog $go_package_version go
if [[ "$?" != "0" ]]; then
return 1
fi
- go get -ldflags "-X main.version=${version}" "git.curoverse.com/arvados.git/$src_path"
+ go get -ldflags "-X main.version=${go_package_version}" "git.curoverse.com/arvados.git/$src_path"
local -a switches=()
systemd_unit="$WORKSPACE/${src_path}/${prog}.service"
fi
switches+=("$WORKSPACE/${license_file}=/usr/share/doc/$prog/${license_file}")
- fpm_build "$GOPATH/bin/${basename}=/usr/bin/${prog}" "${prog}" dir "${version}" "--url=https://arvados.org" "--license=GNU Affero General Public License, version 3.0" "--description=${description}" "${switches[@]}"
+ fpm_build "$GOPATH/bin/${basename}=/usr/bin/${prog}" "${prog}" dir "${go_package_version}" "--url=https://arvados.org" "--license=GNU Affero General Public License, version 3.0" "--description=${description}" "${switches[@]}"
}
default_iteration() {
test_package_presence $pkgname $version rails "$RAILS_PACKAGE_ITERATION"
}
-test_package_presence() {
- local pkgname="$1"; shift
- local version="$1"; shift
- local pkgtype="$1"; shift
- local iteration="$1"; shift
- local arch="$1"; shift
+get_complete_package_name() {
+ # if the errexit flag is set, unset it until this function returns
+ # otherwise, the shift calls below will abort the program if optional arguments are not supplied
+ if [ -o errexit ]; then
+ set +e
+ trap 'set -e' RETURN
+ fi
+ # $__returnvar has the nameref attribute set, which means it is a reference
+ # to another variable that is passed in as the first argument to this function.
+ # see https://www.gnu.org/software/bash/manual/html_node/Shell-Parameters.html
+ local -n __returnvar="$1"; shift
+ local pkgname="$1"; shift
+ local version="$1"; shift
+ local pkgtype="$1"; shift
+ local iteration="$1"; shift
+ local arch="$1"; shift
+ if [[ "$iteration" == "" ]]; then
+ iteration="$(default_iteration "$pkgname" "$version" "$pkgtype")"
+ fi
- if [[ -n "$ONLY_BUILD" ]] && [[ "$pkgname" != "$ONLY_BUILD" ]] ; then
- return 1
- fi
+ if [[ "$arch" == "" ]]; then
+ rpm_architecture="x86_64"
+ deb_architecture="amd64"
- if [[ "$iteration" == "" ]]; then
- iteration="$(default_iteration "$pkgname" "$version" "$pkgtype")"
+ if [[ "$pkgtype" =~ ^(src)$ ]]; then
+ rpm_architecture="noarch"
+ deb_architecture="all"
fi
- if [[ "$arch" == "" ]]; then
+ # These python packages have binary components
+ if [[ "$pkgname" =~ (ruamel|ciso|pycrypto|pyyaml) ]]; then
rpm_architecture="x86_64"
deb_architecture="amd64"
+ fi
+ else
+ rpm_architecture=$arch
+ deb_architecture=$arch
+ fi
- if [[ "$pkgtype" =~ ^(src)$ ]]; then
- rpm_architecture="noarch"
- deb_architecture="all"
- fi
+ local complete_pkgname="${pkgname}_$version${iteration:+-$iteration}_$deb_architecture.deb"
+ if [[ "$FORMAT" == "rpm" ]]; then
+ # rpm packages get iteration 1 if we don't supply one
+ iteration=${iteration:-1}
+ complete_pkgname="$pkgname-$version-${iteration}.$rpm_architecture.rpm"
+ fi
+ __returnvar=${complete_pkgname}
+}
- # These python packages have binary components
- if [[ "$pkgname" =~ (ruamel|ciso|pycrypto|pyyaml) ]]; then
- rpm_architecture="x86_64"
- deb_architecture="amd64"
+# Test if the package already exists, if not return 0, if it does return 1
+test_package_presence() {
+ local pkgname="$1"; shift
+ local version="$1"; shift
+ local pkgtype="$1"; shift
+ local iteration="$1"; shift
+ local arch="$1"; shift
+ if [[ -n "$ONLY_BUILD" ]] && [[ "$pkgname" != "$ONLY_BUILD" ]] ; then
+ # arvados-workbench depends on arvados-server at build time, so even when
+ # only arvados-workbench is being built, we need to build arvados-server too
+ if [[ "$pkgname" != "arvados-server" ]] || [[ "$ONLY_BUILD" != "arvados-workbench" ]]; then
+ return 1
fi
- else
- rpm_architecture=$arch
- deb_architecture=$arch
fi
- if [[ "$FORMAT" == "deb" ]]; then
- local complete_pkgname="${pkgname}_$version${iteration:+-$iteration}_$deb_architecture.deb"
- else
- # rpm packages get iteration 1 if we don't supply one
- iteration=${iteration:-1}
- local complete_pkgname="$pkgname-$version-${iteration}.$rpm_architecture.rpm"
- fi
+ local full_pkgname
+ get_complete_package_name full_pkgname $pkgname $version $pkgtype $iteration $arch
# See if we can skip building the package, only if it already exists in the
# processed/ directory. If so, move it back to the packages directory to make
fi
repo_pkg_list=$(curl -s -o - http://apt.arvados.org/pool/${D}/main/${repo_subdir}/)
- echo ${repo_pkg_list} |grep -q ${complete_pkgname}
+ echo ${repo_pkg_list} |grep -q ${full_pkgname}
if [ $? -eq 0 ] ; then
- echo "Package $complete_pkgname exists, not rebuilding!"
- curl -s -o ./${complete_pkgname} http://apt.arvados.org/pool/${D}/main/${repo_subdir}/${complete_pkgname}
+ echo "Package $full_pkgname exists upstream, not rebuilding, downloading instead!"
+ curl -s -o "$WORKSPACE/packages/$TARGET/${full_pkgname}" http://apt.arvados.org/pool/${D}/main/${repo_subdir}/${full_pkgname}
return 1
- elif test -f "$WORKSPACE/packages/$TARGET/processed/${complete_pkgname}" ; then
- echo "Package $complete_pkgname exists, not rebuilding!"
+ elif test -f "$WORKSPACE/packages/$TARGET/processed/${full_pkgname}" ; then
+ echo "Package $full_pkgname exists, not rebuilding!"
return 1
else
- echo "Package $complete_pkgname not found, building"
+ echo "Package $full_pkgname not found, building"
return 0
fi
else
centos_repo="http://rpm.arvados.org/CentOS/7/dev/x86_64/"
repo_pkg_list=$(curl -s -o - ${centos_repo})
- echo ${repo_pkg_list} |grep -q ${complete_pkgname}
+ echo ${repo_pkg_list} |grep -q ${full_pkgname}
if [ $? -eq 0 ]; then
- echo "Package $complete_pkgname exists, not rebuilding!"
- curl -s -o ./${complete_pkgname} ${centos_repo}${complete_pkgname}
+ echo "Package $full_pkgname exists upstream, not rebuilding, downloading instead!"
+ curl -s -o "$WORKSPACE/packages/$TARGET/${full_pkgname}" ${centos_repo}${full_pkgname}
return 1
- elif test -f "$WORKSPACE/packages/$TARGET/processed/${complete_pkgname}" ; then
- echo "Package $complete_pkgname exists, not rebuilding!"
+ elif test -f "$WORKSPACE/packages/$TARGET/processed/${full_pkgname}" ; then
+ echo "Package $full_pkgname exists, not rebuilding!"
return 1
else
- echo "Package $complete_pkgname not found, building"
+ echo "Package $full_pkgname not found, building"
return 0
fi
fi
shift
if [[ -n "$ONLY_BUILD" ]] && [[ "$PACKAGE_NAME" != "$ONLY_BUILD" ]] && [[ "$PACKAGE" != "$ONLY_BUILD" ]] ; then
+ # arvados-workbench depends on arvados-server at build time, so even when
+ # only arvados-workbench is being built, we need to build arvados-server too
+ if [[ "$PACKAGE_NAME" != "arvados-server" ]] || [[ "$ONLY_BUILD" != "arvados-workbench" ]]; then
return 0
+ fi
fi
local default_iteration_value="$(default_iteration "$PACKAGE" "$VERSION" "$PACKAGE_TYPE")"
Currently only reads @RemoteClusters@ from centralized configuration. Still requires component-specific configuration file.
+h2(#keepproxy). keepproxy
+
+The legacy keepproxy config (loaded from @/etc/arvados/keepproxy/keepproxy.yml@ or a different location specified via -legacy-keepproxy-config command line argument) takes precedence over the centralized config. After you migrate everything from the legacy config to the centralized config, you should delete @/etc/arvados/keepproxy/keepproxy.yml@ and stop using the -legacy-keepproxy-config argument.
+
+h2(#arv-git-httpd). arv-git-httpd
+
+The legacy arv-git-httpd config (loaded from @/etc/arvados/git-httpd/git-httpd.yml@ or a different location specified via -legacy-git-httpd-config command line argument) takes precedence over the centralized config. After you migrate everything from the legacy config to the centralized config, you should delete @/etc/arvados/git-httpd/git-httpd.yml@ and stop using the -legacy-git-httpd-config argument.
+
+
h2. arvados-controller
Already uses centralized config exclusively. No migration needed.
|"v1.1.4":#v1_1_4|"v1.1.3":#v1_1_3|"v1.1.2":#v1_1_2|"v1.1.1":#v1_1_1|"v1.1.0":#v1_1_0|
|\5. "older":#older|
-h3(#master). development master (as of 2019-08-09)
+h3(#master). development master (as of 2019-08-12)
+
+h4. Arv-git-httpd configuration migration
+
+(feature "#14712":https://dev.arvados.org/issues/14712 ) The arv-git-httpd package can now be configured using the centralized configuration file at @/etc/arvados/config.yml@. Configuration via individual command line arguments is no longer available. Please see "arv-git-httpd's config migration guide":{{site.baseurl}}/admin/config-migration.html#arv-git-httpd for more details.
+
+h4. Keep-web dropped support on command line flags configuration
+
+As we're migrating to a central cluster configuration file, the already deprecated way of getting configurations via environment variables and command line flags isn't valid anymore. Current keep-web supports both the now legacy @keep-web.yml@ config format (used by Arvados 1.4) and the new cluster config file format. Please check "keep-web's install guide":{{site.baseurl}}/install/install-keep-web.html for more details.
h4. Jobs API is read-only
So that older Arvados sites don't lose access to legacy records, the API has been converted to read-only. Creating and updating jobs (and related types job_task, pipeline_template and pipeline_instance) is disabled and much of the business logic related has been removed, along with various other code specific to the jobs API. Specifically, the following programs associated with the jobs API have been removed: @crunch-dispatch.rb@, @crunch-job@, @crunchrunner@, @arv-run-pipeline-instance@, @arv-run@.
+h4. Keepproxy configuration migration
+
+(feature "#14715":https://dev.arvados.org/issues/14715 ) Keepproxy can now be configured using the centralized config at @/etc/arvados/config.yml@. Configuration via individual command line arguments is no longer available and the @DisableGet@, @DisablePut@, and @PIDFile@ configuration options are no longer supported. If you are still using the legacy config and @DisableGet@ or @DisablePut@ are set to true or @PIDFile@ has a value, keepproxy will produce an error and fail to start. Please see "keepproxy's config migration guide":{{site.baseurl}}/admin/config-migration.html#keepproxy for more details.
+
h4. No longer stripping ':' from strings in serialized database columns
(bug "#15311":https://dev.arvados.org/issues/15311 ) Strings read from serialized columns in the database with a leading ':' would have the ':' stripped after loading the record. This behavior existed due to legacy serialization behavior which stored Ruby symbols with a leading ':'. Unfortunately this corrupted fields where the leading ":" was intentional. This behavior has been removed.
h3. Configure the API server to advertise the correct SSH URLs
-In your API server's @application.yml@ file, add the following entry:
+Edit the cluster config at @/etc/arvados/config.yml@ and set @Services.GitSSH.ExternalURL@. Replace @uuid_prefix@ with your cluster id.
<notextile>
-<pre><code>git_repo_ssh_base: "git@git.<span class="userinput">uuid_prefix.your.domain</span>:"
+<pre><code>Clusters:
+ <span class="userinput">uuid_prefix</span>:
+ Services:
+ GitSSH:
+ ExternalURL: <span class="userinput">git@git.uuid_prefix.your.domain:</span>
</code></pre>
</notextile>
{% include 'notebox_end' %}
-Create the configuration file @/etc/arvados/git-httpd/git-httpd.yml@. Run @arvados-git-httpd -h@ to learn more about configuration entries.
+Edit the cluster config at @/etc/arvados/config.yml@ and set the following values. Replace @uuid_prefix@ with your cluster id.
<notextile>
-<pre><code>Client:
- APIHost: <b>uuid_prefix.your.domain</b>
- Insecure: false
-GitCommand: /var/lib/arvados/git/gitolite/src/gitolite-shell
-GitoliteHome: /var/lib/arvados/git
-Listen: :9001
-RepoRoot: /var/lib/arvados/git/repositories
+<pre><code>Clusters:
+ <span class="userinput">uuid_prefix</span>:
+ Services:
+ GitHTTP:
+ ExternalURL: <span class="userinput">https://git.uuid_prefix.your.domain/</span>
+ InternalURLs:
+ <span class="userinput">"http://localhost:9001": {}</span>
+ Git:
+ GitCommand: <span class="userinput">/var/lib/arvados/git/gitolite/src/gitolite-shell</span>
+ GitoliteHome: <span class="userinput">/var/lib/arvados/git</span>
+ Repositories: <span class="userinput">/var/lib/arvados/git/repositories</span>
</code></pre>
</notextile>
+Make sure to include the trailing slash for @Services.GitHTTP.ExternalURL@.
+
Restart the systemd service to ensure the new configuration is used.
+
<notextile>
<pre><code>~$ <span class="userinput">sudo systemctl restart arvados-git-httpd</span>
</code></pre>
</code></pre>
</notextile>
-h3. Configure the API server to advertise the correct HTTPS URLs
-
-In your API server's @application.yml@ file, add the following entry:
-
-<notextile>
-<pre><code>git_repo_https_base: https://git.<span class="userinput">uuid_prefix.your.domain</span>/
-</code></pre>
-</notextile>
-
-Make sure to include the trailing slash.
-
h2. Restart Nginx
Restart Nginx to make the Nginx and API server configuration changes take effect.
<notextile>
<pre><code>~$ <span class="userinput">keep-web -h</span>
Usage of keep-web:
- -allow-anonymous
- Serve public data to anonymous clients. Try the token supplied in the ARVADOS_API_TOKEN environment variable when none of the tokens provided in an HTTP request succeed in reading the desired collection. (default false)
- -attachment-only-host string
- Accept credentials, and add "Content-Disposition: attachment" response headers, for requests at this hostname:port. Prohibiting inline display makes it possible to serve untrusted and non-public content from a single origin, i.e., without wildcard DNS or TLS.
- -listen string
- Address to listen on: "host:port", or ":port" to listen on all interfaces. (default ":80")
- -trust-all-content
- Serve non-public content from a single origin. Dangerous: read docs before using!
+ -config file
+ Site configuration file (default may be overridden by setting an ARVADOS_CONFIG environment variable) (default "/etc/arvados/config.yml")
+ -dump-config
+ write current configuration to stdout and exit
+[...]
+ -version
+ print version information and exit.
</code></pre>
</notextile>
-{% assign railscmd = "bundle exec ./script/get_anonymous_user_token.rb --get" %}
-{% assign railsout = "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" %}
-If you intend to use Keep-web to serve public data to anonymous clients, configure it with an anonymous token. You can use the same one you used when you set up your Keepproxy server, or use the following command on the <strong>API server</strong> to create another. {% include 'install_rails_command' %}
-
-Install runit to supervise the Keep-web daemon. {% include 'install_runit' %}
-
-The basic command to start Keep-web in the service run script is:
-
-<notextile>
-<pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
-export ARVADOS_API_TOKEN="<span class="userinput">{{railsout}}</span>"
-exec sudo -u nobody keep-web \
- -listen=<span class="userinput">:9002</span> \
- -attachment-only-host=<span class="userinput">download.uuid_prefix.your.domain</span> \
- -allow-anonymous \
- 2>&1
-</code></pre>
-</notextile>
-
-Omit the @-allow-anonymous@ argument if you do not want to serve public data.
-
-Set @ARVADOS_API_HOST_INSECURE=1@ if your API server's TLS certificate is not signed by a recognized CA.
-
h3. Set up a reverse proxy with TLS support
The Keep-web service will be accessible from anywhere on the internet, so we recommend using TLS for transport encryption.
If neither of the above wildcard options is feasible, you have two choices:
# Serve web content at @collections.uuid_prefix.your.domain@, but only for unauthenticated requests (public data and collection sharing links). Authenticated requests will always result in file downloads, using the @download@ name. For example, the Workbench "preview" button and the "view entire log file" link will invoke file downloads instead of displaying content in the browser window.
-# In the special case where you know you are immune to XSS exploits, you can enable the "trust all content" mode in Keep-web (with the @-trust-all-content@ command line flag) and Workbench (with the @trust_all_content@ item in @application.yml@). With both of these enabled, inline web content can be served from a single @collections@ host name; no wildcard DNS or certificate is needed. Do not do this without understanding the security implications described in the "Keep-web documentation":http://godoc.org/github.com/curoverse/arvados/services/keep-web.
+# In the special case where you know you are immune to XSS exploits, you can enable the "trust all content" mode in Keep-web and Workbench (setting @Collections.TrustAllContent: true@ on the config file). With this enabled, inline web content can be served from a single @collections@ host name; no wildcard DNS or certificate is needed. Do not do this without understanding the security implications described in the "Keep-web documentation":http://godoc.org/github.com/curoverse/arvados/services/keep-web.
-h3. Tell Workbench about the Keep-web service
+h2. Configure Keep-web
+
+{% assign railscmd = "bundle exec ./script/get_anonymous_user_token.rb --get" %}
+{% assign railsout = "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" %}
+If you intend to use Keep-web to serve public data to anonymous clients, configure it with an anonymous token. You can use the same one you used when you set up your Keepproxy server, or use the following command on the <strong>API server</strong> to create another. {% include 'install_rails_command' %}
+
+Set the cluster config file like the following:
+
+<notextile>
+<pre><code>Clusters:
+ <span class="userinput">uuid_prefix</span>:
+ Services:
+ Controller:
+ ExternalURL: "https://<span class="userinput">uuid_prefix</span>.your.domain"
+ WebDAV:
+ InternalURLs:
+ "http://keep_web_hostname_goes_here:9002/": {}
+ ExternalURL: "https://collections.<span class="userinput">uuid_prefix</span>.your.domain"
+ WebDAVDownload:
+ InternalURLs:
+ "http://keep_web_hostname_goes_here:9002/": {}
+ ExternalURL: "https://download.<span class="userinput">uuid_prefix</span>.your.domain"
+ Users:
+ AnonymousUserToken: "{{railsout}}"
+ Collections:
+ TrustAllContent: false
+ TLS:
+ Insecure: false
+</code></pre>
+</notextile>
+
+Set @Users.AnonymousUserToken: ""@ (empty string) if you do not want to serve public data.
+
+Set @TLS.Insecure: true@ if your API server's TLS certificate is not signed by a recognized CA.
Workbench has features like "download file from collection" and "show image" which work better if the content is served by Keep-web rather than Workbench itself. We recommend using the two different hostnames ("download" and "collections" above) for file downloads and inline content respectively.
-Add the following entry to your Workbench configuration file (@/etc/arvados/workbench/application.yml@). This URL will be used for file downloads.
+The following entry on your cluster configuration file (@/etc/arvados/config.yml@) details the URL that will be used for file downloads.
<notextile>
-<pre><code>keep_web_download_url: https://download.<span class="userinput">uuid_prefix</span>.your.domain/c=%{uuid_or_pdh}
+<pre><code>Clusters:
+ <span class="userinput">uuid_prefix</span>:
+ Services:
+ WebDAVDownload:
+ ExternalURL: "https://download.<span class="userinput">uuid_prefix</span>.your.domain"
</code></pre>
</notextile>
-Additionally, add *one* of the following entries to your Workbench configuration file, depending on your DNS setup. This URL will be used to serve user content that can be displayed in the browser, like image previews and static HTML pages.
+Additionally, one of the following entries on your cluster configuration file (depending on your DNS setup) tells Workbench which URL will be used to serve user content that can be displayed in the browser, like image previews and static HTML pages.
+
+<notextile>
+<pre><code>Clusters:
+ <span class="userinput">uuid_prefix</span>:
+ Services:
+ WebDAV:
+ ExternalURL: "https://*--collections.<span class="userinput">uuid_prefix</span>.your.domain"
+ ExternalURL: "https://*.collections.<span class="userinput">uuid_prefix</span>.your.domain"
+ ExternalURL: "https://collections.<span class="userinput">uuid_prefix</span>.your.domain"
+</code></pre>
+</notextile>
+
+h2. Run Keep-web
+
+h3. Start the service (option 1: systemd)
+
+If your system does not use systemd, skip this section and follow the "runit instructions":#runit instead.
+
+If your system uses systemd, the keep-web service should already be set up. Start it and check its status:
<notextile>
-<pre><code>keep_web_url: https://%{uuid_or_pdh}--collections.<span class="userinput">uuid_prefix</span>.your.domain
-keep_web_url: https://%{uuid_or_pdh}.collections.<span class="userinput">uuid_prefix</span>.your.domain
-keep_web_url: https://collections.<span class="userinput">uuid_prefix</span>.your.domain/c=%{uuid_or_pdh}
+<pre><code>~$ <span class="userinput">sudo systemctl restart keep-web</span>
+~$ <span class="userinput">sudo systemctl status keep-web</span>
+● keep-web.service - Arvados Keep web gateway
+ Loaded: loaded (/lib/systemd/system/keep-web.service; enabled)
+ Active: active (running) since Sat 2019-08-10 10:33:21 UTC; 3 days ago
+ Docs: https://doc.arvados.org/
+ Main PID: 4242 (keep-web)
+ CGroup: /system.slice/keep-web.service
+ └─4242 /usr/bin/keep-web
+[...]
</code></pre>
</notextile>
+
+h3(#runit). Start the service (option 2: runit)
+
+Install runit to supervise the Keep-web daemon. {% include 'install_runit' %}
+
+The basic command to start Keep-web in the service run script is:
+
+<notextile>
+<pre><code>exec keep-web
+</code></pre>
+</notextile>
+
<notextile>
<pre><code>~$ <span class="userinput">keepproxy -h</span>
-...
-Usage: keepproxy [-config path/to/keepproxy.yml]
-...
+Usage of keepproxy:
+ -config file
+ Site configuration file (default may be overridden by setting an ARVADOS_CONFIG environment variable) (default "/etc/arvados/config.yml")
+ -dump-config
+ write current configuration to stdout and exit
+[...]
+ -version
+ print version information and exit.
</code></pre>
</notextile>
-h3. Create an API token for the Keepproxy server
-
-{% assign railscmd = "bundle exec ./script/get_anonymous_user_token.rb --get" %}
-{% assign railsout = "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz" %}
-The Keepproxy server needs a token to talk to the API server. On the <strong>API server</strong>, use the following command to create the token. {% include 'install_rails_command' %}
-
-h3. Set up the Keepproxy service
+h3. Update the cluster config
-Install runit to supervise the keepproxy daemon. {% include 'install_runit' %}
-
-The run script for the keepproxy service should set the environment variables @ARVADOS_API_TOKEN@ (with the token you just generated), @ARVADOS_API_HOST@, and, if needed, @ARVADOS_API_HOST_INSECURE@. The core keepproxy command to run is:
+Edit the cluster config at @/etc/arvados/config.yml@ and set @Services.Keepproxy.ExternalURL@ and @Services.Keepproxy.InternalURLs@. Replace @uuid_prefix@ with your cluster id.
<notextile>
-<pre><code>ARVADOS_API_TOKEN=<span class="userinput">{{railsout}}</span> ARVADOS_API_HOST=<span class="userinput">uuid_prefix.your.domain</span> exec keepproxy
-</code></pre>
+<pre><code>Clusters:
+ <span class="userinput">uuid_prefix</span>:
+ Services:
+ Keepproxy:
+ ExternalURL: <span class="userinput">https://keep.uuid_prefix.your.domain</span>
+ InternalURLs:
+ <span class="userinput">"http://localhost:25107": {}</span>
+</span></code></pre>
</notextile>
h3. Set up a reverse proxy with SSL support
EOF</span>
</code></pre></notextile>
+h2. Run Keepproxy
+
+h3. Start the service (option 1: systemd)
+
+If your system does not use systemd, skip this section and follow the "runit instructions":#runit instead.
+
+If your system uses systemd, the keepproxy service should already be set up. Start it and check its status:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo systemctl restart keepproxy</span>
+~$ <span class="userinput">sudo systemctl status keepproxy</span>
+● keepproxy.service - Arvados Keep Proxy
+ Loaded: loaded (/lib/systemd/system/keepproxy.service; enabled)
+ Active: active (running) since Tue 2019-07-23 09:33:47 EDT; 3 weeks 1 days ago
+ Docs: https://doc.arvados.org/
+ Main PID: 1150 (Keepproxy)
+ CGroup: /system.slice/keepproxy.service
+ └─1150 /usr/bin/keepproxy
+[...]
+</code></pre>
+</notextile>
+
+h3(#runit). Start the service (option 2: runit)
+
+Install runit to supervise the Keep-web daemon. {% include 'install_runit' %}
+
h3. Testing keepproxy
Log into a host that is on an external network from your private Arvados network. The host should be able to contact your keepproxy server (eg keep.$uuid_prefix.arvadosapi.com), but not your keepstore servers (eg keep[0-9].$uuid_prefix.arvadosapi.com).
WebsocketClientEventQueue: 64
WebsocketServerEventQueue: 4
+ # Timeout on requests to internal Keep services.
+ KeepServiceRequestTimeout: 15s
+
Users:
# Config parameters to automatically setup new users. If enabled,
# this users will be able to self-activate. Enable this if you want
# to run an open instance where anyone can create an account and use
# the system without requiring manual approval.
#
- # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on.
- # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup.
+ # The params AutoSetupNewUsersWith* are meaningful only when AutoSetupNewUsers is turned on.
+ # AutoSetupUsernameBlacklist is a list of usernames to be blacklisted for auto setup.
AutoSetupNewUsers: false
AutoSetupNewUsersWithVmUUID: ""
AutoSetupNewUsersWithRepository: false
syslog: {}
SAMPLE: {}
- # When new_users_are_active is set to true, new users will be active
+ # When NewUsersAreActive is set to true, new users will be active
# immediately. This skips the "self-activate" step which enforces
# user agreements. Should only be enabled for development.
NewUsersAreActive: false
# should be an address associated with a Google account.
AutoAdminUserWithEmail: ""
- # If auto_admin_first_user is set to true, the first user to log in when no
+ # If AutoAdminFirstUser is set to true, the first user to log in when no
# other admin users exist will automatically become an admin user.
AutoAdminFirstUser: false
NewUserNotificationRecipients: {}
NewInactiveUserNotificationRecipients: {}
- # Set anonymous_user_token to enable anonymous user access. You can get
+ # Set AnonymousUserToken to enable anonymous user access. You can get
# the token by running "bundle exec ./script/get_anonymous_user_token.rb"
# in the directory where your API server is running.
AnonymousUserToken: ""
# Maximum number of log rows to delete in a single SQL transaction.
#
- # If max_audit_log_delete_batch is 0, log entries will never be
+ # If MaxDeleteBatch is 0, log entries will never be
# deleted by Arvados. Cleanup can be done by an external process
# without affecting any Arvados system processes, as long as very
# recent (<5 minutes old) logs are not deleted.
# identical to the permission key given to Keep. IMPORTANT: This is
# a site secret. It should be at least 50 characters.
#
- # Modifying blob_signing_key will invalidate all existing
+ # Modifying BlobSigningKey will invalidate all existing
# signatures, which can cause programs to fail (e.g., arv-put,
# arv-get, and Crunch jobs). To avoid errors, rotate keys only when
# no such processes are running.
# keepstore servers. Otherwise, reading data blocks and saving
# collections will fail with HTTP 403 permission errors.
#
- # Modifying blob_signature_ttl invalidates existing signatures; see
- # blob_signing_key note above.
+ # Modifying BlobSigningTTL invalidates existing signatures; see
+ # BlobSigningKey note above.
#
# The default is 2 weeks.
BlobSigningTTL: 336h
# Default lifetime for ephemeral collections: 2 weeks. This must not
- # be less than blob_signature_ttl.
+ # be less than BlobSigningTTL.
DefaultTrashLifetime: 336h
# Interval (seconds) between trash sweeps. During a trash sweep,
# If true, enable collection versioning.
# When a collection's preserve_version field is true or the current version
- # is older than the amount of seconds defined on preserve_version_if_idle,
+ # is older than the amount of seconds defined on PreserveVersionIfIdle,
# a snapshot of the collection's previous state is created and linked to
# the current collection.
CollectionVersioning: false
# The default setting (false) is appropriate for a multi-user site.
TrustAllContent: false
+ # Cache parameters for WebDAV content serving:
+ # * TTL: Maximum time to cache manifests and permission checks.
+ # * UUIDTTL: Maximum time to cache collection state.
+ # * MaxBlockEntries: Maximum number of block cache entries.
+ # * MaxCollectionEntries: Maximum number of collection cache entries.
+ # * MaxCollectionBytes: Approximate memory limit for collection cache.
+ # * MaxPermissionEntries: Maximum number of permission cache entries.
+ # * MaxUUIDEntries: Maximum number of UUID cache entries.
+ WebDAVCache:
+ TTL: 300s
+ UUIDTTL: 5s
+ MaxBlockEntries: 4
+ MaxCollectionEntries: 1000
+ MaxCollectionBytes: 100000000
+ MaxPermissionEntries: 1000
+ MaxUUIDEntries: 1000
+
Login:
- # These settings are provided by your OAuth2 provider (e.g.,
- # sso-provider).
+ # These settings are provided by your OAuth2 provider (eg
+ # Google) used to perform upstream authentication.
ProviderAppSecret: ""
ProviderAppID: ""
+ # The cluster ID to delegate the user database. When set,
+ # logins on this cluster will be redirected to the login cluster
+ # (login cluster must appear in RemoteHosts with Proxy: true)
+ LoginCluster: ""
+
+ # How long a cached token belonging to a remote cluster will
+ # remain valid before it needs to be revalidated.
+ RemoteTokenRefresh: 5m
+
Git:
+ # Path to git or gitolite-shell executable. Each authenticated
+ # request will execute this program with the single argument "http-backend"
+ GitCommand: /usr/bin/git
+
+ # Path to Gitolite's home directory. If a non-empty path is given,
+ # the CGI environment will be set up to support the use of
+ # gitolite-shell as a GitCommand: for example, if GitoliteHome is
+ # "/gh", then the CGI environment will have GITOLITE_HTTP_HOME=/gh,
+ # PATH=$PATH:/gh/bin, and GL_BYPASS_ACCESS_CHECKS=1.
+ GitoliteHome: ""
+
# Git repositories must be readable by api server, or you won't be
# able to submit crunch jobs. To pass the test suites, put a clone
# of the arvados tree in {git_repositories_dir}/arvados.git or
cfg.Clusters[cluster.ClusterID] = *cluster
return nil
}
+
+type oldKeepProxyConfig struct {
+ Client *arvados.Client
+ Listen *string
+ DisableGet *bool
+ DisablePut *bool
+ DefaultReplicas *int
+ Timeout *arvados.Duration
+ PIDFile *string
+ Debug *bool
+ ManagementToken *string
+}
+
+const defaultKeepproxyConfigPath = "/etc/arvados/keepproxy/keepproxy.yml"
+
+func (ldr *Loader) loadOldKeepproxyConfig(cfg *arvados.Config) error {
+ if ldr.KeepproxyPath == "" {
+ return nil
+ }
+ var oc oldKeepProxyConfig
+ err := ldr.loadOldConfigHelper("keepproxy", ldr.KeepproxyPath, &oc)
+ if os.IsNotExist(err) && ldr.KeepproxyPath == defaultKeepproxyConfigPath {
+ return nil
+ } else if err != nil {
+ return err
+ }
+
+ cluster, err := cfg.GetCluster("")
+ if err != nil {
+ return err
+ }
+
+ loadOldClientConfig(cluster, oc.Client)
+
+ if oc.Listen != nil {
+ cluster.Services.Keepproxy.InternalURLs[arvados.URL{Host: *oc.Listen}] = arvados.ServiceInstance{}
+ }
+ if oc.DefaultReplicas != nil {
+ cluster.Collections.DefaultReplication = *oc.DefaultReplicas
+ }
+ if oc.Timeout != nil {
+ cluster.API.KeepServiceRequestTimeout = *oc.Timeout
+ }
+ if oc.Debug != nil {
+ if *oc.Debug && cluster.SystemLogs.LogLevel != "debug" {
+ cluster.SystemLogs.LogLevel = "debug"
+ } else if !*oc.Debug && cluster.SystemLogs.LogLevel != "info" {
+ cluster.SystemLogs.LogLevel = "info"
+ }
+ }
+ if oc.ManagementToken != nil {
+ cluster.ManagementToken = *oc.ManagementToken
+ }
+
+ // The following legacy options are no longer supported. If they are set to
+ // true or PIDFile has a value, error out and notify the user
+ unsupportedEntry := func(cfgEntry string) error {
+ return fmt.Errorf("the keepproxy %s configuration option is no longer supported, please remove it from your configuration file", cfgEntry)
+ }
+ if oc.DisableGet != nil && *oc.DisableGet {
+ return unsupportedEntry("DisableGet")
+ }
+ if oc.DisablePut != nil && *oc.DisablePut {
+ return unsupportedEntry("DisablePut")
+ }
+ if oc.PIDFile != nil && *oc.PIDFile != "" {
+ return unsupportedEntry("PIDFile")
+ }
+
+ cfg.Clusters[cluster.ClusterID] = *cluster
+ return nil
+}
+
+const defaultKeepWebConfigPath = "/etc/arvados/keep-web/keep-web.yml"
+
+type oldKeepWebConfig struct {
+ Client *arvados.Client
+
+ Listen string
+
+ AnonymousTokens []string
+ AttachmentOnlyHost string
+ TrustAllContent bool
+
+ Cache struct {
+ TTL arvados.Duration
+ UUIDTTL arvados.Duration
+ MaxCollectionEntries int
+ MaxCollectionBytes int64
+ MaxPermissionEntries int
+ MaxUUIDEntries int
+ }
+
+ // Hack to support old command line flag, which is a bool
+ // meaning "get actual token from environment".
+ deprecatedAllowAnonymous bool
+
+ // Authorization token to be included in all health check requests.
+ ManagementToken string
+}
+
+func (ldr *Loader) loadOldKeepWebConfig(cfg *arvados.Config) error {
+ if ldr.KeepWebPath == "" {
+ return nil
+ }
+ var oc oldKeepWebConfig
+ err := ldr.loadOldConfigHelper("keep-web", ldr.KeepWebPath, &oc)
+ if os.IsNotExist(err) && ldr.KeepWebPath == defaultKeepWebConfigPath {
+ return nil
+ } else if err != nil {
+ return err
+ }
+
+ cluster, err := cfg.GetCluster("")
+ if err != nil {
+ return err
+ }
+
+ loadOldClientConfig(cluster, oc.Client)
+
+ cluster.Services.WebDAV.InternalURLs[arvados.URL{Host: oc.Listen}] = arvados.ServiceInstance{}
+ cluster.Services.WebDAVDownload.InternalURLs[arvados.URL{Host: oc.Listen}] = arvados.ServiceInstance{}
+ cluster.Services.WebDAVDownload.ExternalURL = arvados.URL{Host: oc.AttachmentOnlyHost}
+ cluster.TLS.Insecure = oc.Client.Insecure
+ cluster.ManagementToken = oc.ManagementToken
+ cluster.Collections.TrustAllContent = oc.TrustAllContent
+ cluster.Collections.WebDAVCache.TTL = oc.Cache.TTL
+ cluster.Collections.WebDAVCache.UUIDTTL = oc.Cache.UUIDTTL
+ cluster.Collections.WebDAVCache.MaxCollectionEntries = oc.Cache.MaxCollectionEntries
+ cluster.Collections.WebDAVCache.MaxCollectionBytes = oc.Cache.MaxCollectionBytes
+ cluster.Collections.WebDAVCache.MaxPermissionEntries = oc.Cache.MaxPermissionEntries
+ cluster.Collections.WebDAVCache.MaxUUIDEntries = oc.Cache.MaxUUIDEntries
+ if len(oc.AnonymousTokens) > 0 {
+ cluster.Users.AnonymousUserToken = oc.AnonymousTokens[0]
+ if len(oc.AnonymousTokens) > 1 {
+ ldr.Logger.Warn("More than 1 anonymous tokens configured, using only the first and discarding the rest.")
+ }
+ }
+
+ cfg.Clusters[cluster.ClusterID] = *cluster
+ return nil
+}
+
+const defaultGitHttpdConfigPath = "/etc/arvados/git-httpd/git-httpd.yml"
+
+type oldGitHttpdConfig struct {
+ Client *arvados.Client
+ Listen string
+ GitCommand string
+ GitoliteHome string
+ RepoRoot string
+ ManagementToken string
+}
+
+func (ldr *Loader) loadOldGitHttpdConfig(cfg *arvados.Config) error {
+ if ldr.GitHttpdPath == "" {
+ return nil
+ }
+ var oc oldGitHttpdConfig
+ err := ldr.loadOldConfigHelper("arv-git-httpd", ldr.GitHttpdPath, &oc)
+ if os.IsNotExist(err) && ldr.GitHttpdPath == defaultGitHttpdConfigPath {
+ return nil
+ } else if err != nil {
+ return err
+ }
+
+ cluster, err := cfg.GetCluster("")
+ if err != nil {
+ return err
+ }
+
+ loadOldClientConfig(cluster, oc.Client)
+
+ cluster.Services.GitHTTP.InternalURLs[arvados.URL{Host: oc.Listen}] = arvados.ServiceInstance{}
+ cluster.TLS.Insecure = oc.Client.Insecure
+ cluster.ManagementToken = oc.ManagementToken
+ cluster.Git.GitCommand = oc.GitCommand
+ cluster.Git.GitoliteHome = oc.GitoliteHome
+ cluster.Git.Repositories = oc.RepoRoot
+
+ cfg.Clusters[cluster.ClusterID] = *cluster
+ return nil
+}
package config
import (
+ "flag"
+ "fmt"
+ "io/ioutil"
"os"
+ "time"
+ "git.curoverse.com/arvados.git/sdk/go/arvados"
check "gopkg.in/check.v1"
)
+func testLoadLegacyConfig(content []byte, mungeFlag string, c *check.C) (*arvados.Cluster, error) {
+ tmpfile, err := ioutil.TempFile("", "example")
+ if err != nil {
+ return nil, err
+ }
+ defer os.Remove(tmpfile.Name())
+
+ if _, err := tmpfile.Write(content); err != nil {
+ return nil, err
+ }
+ if err := tmpfile.Close(); err != nil {
+ return nil, err
+ }
+ flags := flag.NewFlagSet("test", flag.ExitOnError)
+ ldr := testLoader(c, "Clusters: {zzzzz: {}}", nil)
+ ldr.SetupFlags(flags)
+ args := ldr.MungeLegacyConfigArgs(ldr.Logger, []string{"-config", tmpfile.Name()}, mungeFlag)
+ flags.Parse(args)
+ cfg, err := ldr.Load()
+ if err != nil {
+ return nil, err
+ }
+ cluster, err := cfg.GetCluster("")
+ if err != nil {
+ return nil, err
+ }
+ return cluster, nil
+}
+
func (s *LoadSuite) TestDeprecatedNodeProfilesToServices(c *check.C) {
hostname, err := os.Hostname()
c.Assert(err, check.IsNil)
listen: ":9006"
`)
}
+
+func (s *LoadSuite) TestLegacyKeepWebConfig(c *check.C) {
+ content := []byte(`
+{
+ "Client": {
+ "Scheme": "",
+ "APIHost": "example.com",
+ "AuthToken": "abcdefg",
+ },
+ "Listen": ":80",
+ "AnonymousTokens": [
+ "anonusertoken"
+ ],
+ "AttachmentOnlyHost": "download.example.com",
+ "TrustAllContent": true,
+ "Cache": {
+ "TTL": "1m",
+ "UUIDTTL": "1s",
+ "MaxCollectionEntries": 42,
+ "MaxCollectionBytes": 1234567890,
+ "MaxPermissionEntries": 100,
+ "MaxUUIDEntries": 100
+ },
+ "ManagementToken": "xyzzy"
+}
+`)
+ cluster, err := testLoadLegacyConfig(content, "-legacy-keepweb-config", c)
+ c.Check(err, check.IsNil)
+
+ c.Check(cluster.Services.Controller.ExternalURL, check.Equals, arvados.URL{Scheme: "https", Host: "example.com"})
+ c.Check(cluster.SystemRootToken, check.Equals, "abcdefg")
+
+ c.Check(cluster.Collections.WebDAVCache.TTL, check.Equals, arvados.Duration(60*time.Second))
+ c.Check(cluster.Collections.WebDAVCache.UUIDTTL, check.Equals, arvados.Duration(time.Second))
+ c.Check(cluster.Collections.WebDAVCache.MaxCollectionEntries, check.Equals, 42)
+ c.Check(cluster.Collections.WebDAVCache.MaxCollectionBytes, check.Equals, int64(1234567890))
+ c.Check(cluster.Collections.WebDAVCache.MaxPermissionEntries, check.Equals, 100)
+ c.Check(cluster.Collections.WebDAVCache.MaxUUIDEntries, check.Equals, 100)
+
+ c.Check(cluster.Services.WebDAVDownload.ExternalURL, check.Equals, arvados.URL{Host: "download.example.com"})
+ c.Check(cluster.Services.WebDAVDownload.InternalURLs[arvados.URL{Host: ":80"}], check.NotNil)
+ c.Check(cluster.Services.WebDAV.InternalURLs[arvados.URL{Host: ":80"}], check.NotNil)
+
+ c.Check(cluster.Collections.TrustAllContent, check.Equals, true)
+ c.Check(cluster.Users.AnonymousUserToken, check.Equals, "anonusertoken")
+ c.Check(cluster.ManagementToken, check.Equals, "xyzzy")
+}
+
+func (s *LoadSuite) TestLegacyKeepproxyConfig(c *check.C) {
+ f := "-legacy-keepproxy-config"
+ content := []byte(fmtKeepproxyConfig("", true))
+ cluster, err := testLoadLegacyConfig(content, f, c)
+
+ c.Check(err, check.IsNil)
+ c.Check(cluster, check.NotNil)
+ c.Check(cluster.Services.Controller.ExternalURL, check.Equals, arvados.URL{Scheme: "https", Host: "example.com"})
+ c.Check(cluster.SystemRootToken, check.Equals, "abcdefg")
+ c.Check(cluster.ManagementToken, check.Equals, "xyzzy")
+ c.Check(cluster.Services.Keepproxy.InternalURLs[arvados.URL{Host: ":80"}], check.Equals, arvados.ServiceInstance{})
+ c.Check(cluster.Collections.DefaultReplication, check.Equals, 0)
+ c.Check(cluster.API.KeepServiceRequestTimeout.String(), check.Equals, "15s")
+ c.Check(cluster.SystemLogs.LogLevel, check.Equals, "debug")
+
+ content = []byte(fmtKeepproxyConfig("", false))
+ cluster, err = testLoadLegacyConfig(content, f, c)
+ c.Check(cluster.SystemLogs.LogLevel, check.Equals, "info")
+
+ content = []byte(fmtKeepproxyConfig(`"DisableGet": true,`, true))
+ _, err = testLoadLegacyConfig(content, f, c)
+ c.Check(err, check.NotNil)
+
+ content = []byte(fmtKeepproxyConfig(`"DisablePut": true,`, true))
+ _, err = testLoadLegacyConfig(content, f, c)
+ c.Check(err, check.NotNil)
+
+ content = []byte(fmtKeepproxyConfig(`"PIDFile": "test",`, true))
+ _, err = testLoadLegacyConfig(content, f, c)
+ c.Check(err, check.NotNil)
+
+ content = []byte(fmtKeepproxyConfig(`"DisableGet": false, "DisablePut": false, "PIDFile": "",`, true))
+ _, err = testLoadLegacyConfig(content, f, c)
+ c.Check(err, check.IsNil)
+}
+
+func fmtKeepproxyConfig(param string, debugLog bool) string {
+ return fmt.Sprintf(`
+{
+ "Client": {
+ "Scheme": "",
+ "APIHost": "example.com",
+ "AuthToken": "abcdefg",
+ "Insecure": false
+ },
+ "Listen": ":80",
+ "DefaultReplicas": 0,
+ "Timeout": "15s",
+ "Debug": %t,
+ %s
+ "ManagementToken": "xyzzy"
+}
+`, debugLog, param)
+}
+
+func (s *LoadSuite) TestLegacyArvGitHttpdConfig(c *check.C) {
+ content := []byte(`
+{
+ "Client": {
+ "Scheme": "",
+ "APIHost": "example.com",
+ "AuthToken": "abcdefg",
+ },
+ "Listen": ":9000",
+ "GitCommand": "/test/git",
+ "GitoliteHome": "/test/gitolite",
+ "RepoRoot": "/test/reporoot",
+ "ManagementToken": "xyzzy"
+}
+`)
+ f := "-legacy-git-httpd-config"
+ cluster, err := testLoadLegacyConfig(content, f, c)
+
+ c.Check(err, check.IsNil)
+ c.Check(cluster, check.NotNil)
+ c.Check(cluster.Services.Controller.ExternalURL, check.Equals, arvados.URL{Scheme: "https", Host: "example.com"})
+ c.Check(cluster.SystemRootToken, check.Equals, "abcdefg")
+ c.Check(cluster.ManagementToken, check.Equals, "xyzzy")
+ c.Check(cluster.Git.GitCommand, check.Equals, "/test/git")
+ c.Check(cluster.Git.GitoliteHome, check.Equals, "/test/gitolite")
+ c.Check(cluster.Git.Repositories, check.Equals, "/test/reporoot")
+ c.Check(cluster.Services.Keepproxy.InternalURLs[arvados.URL{Host: ":9000"}], check.Equals, arvados.ServiceInstance{})
+}
"API.WebsocketClientEventQueue": false,
"API.SendTimeout": true,
"API.WebsocketServerEventQueue": false,
+ "API.KeepServiceRequestTimeout": false,
"AuditLogs": false,
"AuditLogs.MaxAge": false,
"AuditLogs.MaxDeleteBatch": false,
"Collections.PreserveVersionIfIdle": true,
"Collections.TrashSweepInterval": false,
"Collections.TrustAllContent": false,
+ "Collections.WebDAVCache": false,
"Containers": true,
"Containers.CloudVMs": false,
"Containers.CrunchRunCommand": false,
"InstanceTypes": true,
"InstanceTypes.*": true,
"InstanceTypes.*.*": true,
- "Login": false,
+ "Login": true,
+ "Login.ProviderAppSecret": false,
+ "Login.ProviderAppID": false,
+ "Login.LoginCluster": true,
+ "Login.RemoteTokenRefresh": true,
"Mail": false,
"ManagementToken": false,
"PostgreSQL": false,
WebsocketClientEventQueue: 64
WebsocketServerEventQueue: 4
+ # Timeout on requests to internal Keep services.
+ KeepServiceRequestTimeout: 15s
+
Users:
# Config parameters to automatically setup new users. If enabled,
# this users will be able to self-activate. Enable this if you want
# to run an open instance where anyone can create an account and use
# the system without requiring manual approval.
#
- # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on.
- # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup.
+ # The params AutoSetupNewUsersWith* are meaningful only when AutoSetupNewUsers is turned on.
+ # AutoSetupUsernameBlacklist is a list of usernames to be blacklisted for auto setup.
AutoSetupNewUsers: false
AutoSetupNewUsersWithVmUUID: ""
AutoSetupNewUsersWithRepository: false
syslog: {}
SAMPLE: {}
- # When new_users_are_active is set to true, new users will be active
+ # When NewUsersAreActive is set to true, new users will be active
# immediately. This skips the "self-activate" step which enforces
# user agreements. Should only be enabled for development.
NewUsersAreActive: false
# should be an address associated with a Google account.
AutoAdminUserWithEmail: ""
- # If auto_admin_first_user is set to true, the first user to log in when no
+ # If AutoAdminFirstUser is set to true, the first user to log in when no
# other admin users exist will automatically become an admin user.
AutoAdminFirstUser: false
NewUserNotificationRecipients: {}
NewInactiveUserNotificationRecipients: {}
- # Set anonymous_user_token to enable anonymous user access. You can get
+ # Set AnonymousUserToken to enable anonymous user access. You can get
# the token by running "bundle exec ./script/get_anonymous_user_token.rb"
# in the directory where your API server is running.
AnonymousUserToken: ""
# Maximum number of log rows to delete in a single SQL transaction.
#
- # If max_audit_log_delete_batch is 0, log entries will never be
+ # If MaxDeleteBatch is 0, log entries will never be
# deleted by Arvados. Cleanup can be done by an external process
# without affecting any Arvados system processes, as long as very
# recent (<5 minutes old) logs are not deleted.
# identical to the permission key given to Keep. IMPORTANT: This is
# a site secret. It should be at least 50 characters.
#
- # Modifying blob_signing_key will invalidate all existing
+ # Modifying BlobSigningKey will invalidate all existing
# signatures, which can cause programs to fail (e.g., arv-put,
# arv-get, and Crunch jobs). To avoid errors, rotate keys only when
# no such processes are running.
# keepstore servers. Otherwise, reading data blocks and saving
# collections will fail with HTTP 403 permission errors.
#
- # Modifying blob_signature_ttl invalidates existing signatures; see
- # blob_signing_key note above.
+ # Modifying BlobSigningTTL invalidates existing signatures; see
+ # BlobSigningKey note above.
#
# The default is 2 weeks.
BlobSigningTTL: 336h
# Default lifetime for ephemeral collections: 2 weeks. This must not
- # be less than blob_signature_ttl.
+ # be less than BlobSigningTTL.
DefaultTrashLifetime: 336h
# Interval (seconds) between trash sweeps. During a trash sweep,
# If true, enable collection versioning.
# When a collection's preserve_version field is true or the current version
- # is older than the amount of seconds defined on preserve_version_if_idle,
+ # is older than the amount of seconds defined on PreserveVersionIfIdle,
# a snapshot of the collection's previous state is created and linked to
# the current collection.
CollectionVersioning: false
# The default setting (false) is appropriate for a multi-user site.
TrustAllContent: false
+ # Cache parameters for WebDAV content serving:
+ # * TTL: Maximum time to cache manifests and permission checks.
+ # * UUIDTTL: Maximum time to cache collection state.
+ # * MaxBlockEntries: Maximum number of block cache entries.
+ # * MaxCollectionEntries: Maximum number of collection cache entries.
+ # * MaxCollectionBytes: Approximate memory limit for collection cache.
+ # * MaxPermissionEntries: Maximum number of permission cache entries.
+ # * MaxUUIDEntries: Maximum number of UUID cache entries.
+ WebDAVCache:
+ TTL: 300s
+ UUIDTTL: 5s
+ MaxBlockEntries: 4
+ MaxCollectionEntries: 1000
+ MaxCollectionBytes: 100000000
+ MaxPermissionEntries: 1000
+ MaxUUIDEntries: 1000
+
Login:
- # These settings are provided by your OAuth2 provider (e.g.,
- # sso-provider).
+ # These settings are provided by your OAuth2 provider (eg
+ # Google) used to perform upstream authentication.
ProviderAppSecret: ""
ProviderAppID: ""
+ # The cluster ID to delegate the user database. When set,
+ # logins on this cluster will be redirected to the login cluster
+ # (login cluster must appear in RemoteHosts with Proxy: true)
+ LoginCluster: ""
+
+ # How long a cached token belonging to a remote cluster will
+ # remain valid before it needs to be revalidated.
+ RemoteTokenRefresh: 5m
+
Git:
+ # Path to git or gitolite-shell executable. Each authenticated
+ # request will execute this program with the single argument "http-backend"
+ GitCommand: /usr/bin/git
+
+ # Path to Gitolite's home directory. If a non-empty path is given,
+ # the CGI environment will be set up to support the use of
+ # gitolite-shell as a GitCommand: for example, if GitoliteHome is
+ # "/gh", then the CGI environment will have GITOLITE_HTTP_HOME=/gh,
+ # PATH=$PATH:/gh/bin, and GL_BYPASS_ACCESS_CHECKS=1.
+ GitoliteHome: ""
+
# Git repositories must be readable by api server, or you won't be
# able to submit crunch jobs. To pass the test suites, put a clone
# of the arvados tree in {git_repositories_dir}/arvados.git or
Path string
KeepstorePath string
+ KeepWebPath string
CrunchDispatchSlurmPath string
WebsocketPath string
+ KeepproxyPath string
+ GitHttpdPath string
configdata []byte
}
func (ldr *Loader) SetupFlags(flagset *flag.FlagSet) {
flagset.StringVar(&ldr.Path, "config", arvados.DefaultConfigFile, "Site configuration `file` (default may be overridden by setting an ARVADOS_CONFIG environment variable)")
flagset.StringVar(&ldr.KeepstorePath, "legacy-keepstore-config", defaultKeepstoreConfigPath, "Legacy keepstore configuration `file`")
+ flagset.StringVar(&ldr.KeepWebPath, "legacy-keepweb-config", defaultKeepWebConfigPath, "Legacy keep-web configuration `file`")
flagset.StringVar(&ldr.CrunchDispatchSlurmPath, "legacy-crunch-dispatch-slurm-config", defaultCrunchDispatchSlurmConfigPath, "Legacy crunch-dispatch-slurm configuration `file`")
flagset.StringVar(&ldr.WebsocketPath, "legacy-ws-config", defaultWebsocketConfigPath, "Legacy arvados-ws configuration `file`")
+ flagset.StringVar(&ldr.KeepproxyPath, "legacy-keepproxy-config", defaultKeepproxyConfigPath, "Legacy keepproxy configuration `file`")
+ flagset.StringVar(&ldr.GitHttpdPath, "legacy-git-httpd-config", defaultGitHttpdConfigPath, "Legacy arv-git-httpd configuration `file`")
flagset.BoolVar(&ldr.SkipLegacy, "skip-legacy", false, "Don't load legacy config files")
}
if legacyConfigArg != "-legacy-ws-config" {
ldr.WebsocketPath = ""
}
+ if legacyConfigArg != "-legacy-keepweb-config" {
+ ldr.KeepWebPath = ""
+ }
+ if legacyConfigArg != "-legacy-keepproxy-config" {
+ ldr.KeepproxyPath = ""
+ }
+ if legacyConfigArg != "-legacy-git-httpd-config" {
+ ldr.GitHttpdPath = ""
+ }
return munged
}
// legacy config file for the current component
for _, err := range []error{
ldr.loadOldKeepstoreConfig(&cfg),
+ ldr.loadOldKeepWebConfig(&cfg),
ldr.loadOldCrunchDispatchSlurmConfig(&cfg),
ldr.loadOldWebsocketConfig(&cfg),
+ ldr.loadOldKeepproxyConfig(&cfg),
+ ldr.loadOldGitHttpdConfig(&cfg),
} {
if err != nil {
return nil, err
}
}
+type WebDAVCacheConfig struct {
+ TTL Duration
+ UUIDTTL Duration
+ MaxBlockEntries int
+ MaxCollectionEntries int
+ MaxCollectionBytes int64
+ MaxPermissionEntries int
+ MaxUUIDEntries int
+}
type Cluster struct {
ClusterID string `json:"-"`
ManagementToken string
SendTimeout Duration
WebsocketClientEventQueue int
WebsocketServerEventQueue int
+ KeepServiceRequestTimeout Duration
}
AuditLogs struct {
MaxAge Duration
PreserveVersionIfIdle Duration
TrashSweepInterval Duration
TrustAllContent bool
+
+ WebDAVCache WebDAVCacheConfig
}
Git struct {
+ GitCommand string
+ GitoliteHome string
Repositories string
}
Login struct {
- ProviderAppSecret string
- ProviderAppID string
+ ProviderAppSecret string
+ ProviderAppID string
+ LoginCluster string
+ RemoteTokenRefresh Duration
}
Mail struct {
MailchimpAPIKey string
from arvados.collection import CollectionReader
HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
+CR_UNCOMMITTED = 'Uncommitted'
+CR_COMMITTED = 'Committed'
+CR_FINAL = 'Final'
keep_locator_pattern = re.compile(r'[0-9a-f]{32}\+\d+(\+\S+)*')
signed_locator_pattern = re.compile(r'[0-9a-f]{32}\+\d+(\+\S+)*\+A\S+(\+\S+)*')
ssl_certificate_key "{{SSLKEY}}";
location / {
proxy_pass http://keep-web;
+ proxy_set_header Host $http_host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto https;
+ proxy_redirect off;
client_max_body_size 0;
proxy_http_version 1.1;
proxy_request_buffering off;
-
- # Unlike other proxy sections, here we need to override the
- # requested Host header and use proxy_redirect because of the
- # way the test suite orchestrates services. Keep-web's "download
- # only" behavior relies on the Host header matching a configured
- # value, but when run_test_servers.py writes keep-web's command
- # line, the keep-web-dl TLS port (which clients will connect to
- # and include in their Host header) has not yet been assigned.
- #
- # In production, "proxy_set_header Host $http_host;
- # proxy_redirect off;" works: keep-web's redirect URLs will
- # match the request URL received by Nginx.
- #
- # Here, keep-web will issue redirects to https://download/ and
- # Nginx will rewrite them.
- #
- proxy_set_header Host download;
- proxy_redirect https://download/ https://$host:{{KEEPWEBDLSSLPORT}}/;
}
}
upstream ws {
env['ARVADOS_API_TOKEN'] = auth_token('anonymous')
logf = open(_logfilename('keepproxy'), 'a')
kp = subprocess.Popen(
- ['keepproxy',
- '-pid='+_pidfile('keepproxy'),
- '-listen=:{}'.format(port)],
- env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
+ ['keepproxy'], env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
+
+ with open(_pidfile('keepproxy'), 'w') as f:
+ f.write(str(kp.pid))
+ _wait_until_port_listens(port)
print("Using API %s token %s" % (os.environ['ARVADOS_API_HOST'], auth_token('admin')), file=sys.stdout)
api = arvados.api(
return
stop_arv_git_httpd()
- gitdir = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'git')
gitport = internal_port_from_config("GitHTTP")
env = os.environ.copy()
env.pop('ARVADOS_API_TOKEN', None)
logf = open(_logfilename('arv-git-httpd'), 'a')
- agh = subprocess.Popen(
- ['arv-git-httpd',
- '-repo-root='+gitdir+'/test',
- '-management-token=e687950a23c3a9bceec28c6223a06c79',
- '-address=:'+str(gitport)],
+ agh = subprocess.Popen(['arv-git-httpd'],
env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf)
with open(_pidfile('arv-git-httpd'), 'w') as f:
f.write(str(agh.pid))
keepwebport = internal_port_from_config("WebDAV")
env = os.environ.copy()
- env['ARVADOS_API_TOKEN'] = auth_token('anonymous')
logf = open(_logfilename('keep-web'), 'a')
keepweb = subprocess.Popen(
- ['keep-web',
- '-allow-anonymous',
- '-attachment-only-host=download',
- '-management-token=e687950a23c3a9bceec28c6223a06c79',
- '-listen=:'+str(keepwebport)],
+ ['keep-web'],
env=env, stdin=open('/dev/null'), stdout=logf, stderr=logf)
with open(_pidfile('keep-web'), 'w') as f:
f.write(str(keepweb.pid))
"TLS": {
"Insecure": True
},
- "Services": services
+ "Services": services,
+ "Users": {
+ "AnonymousUserToken": auth_token('anonymous')
+ },
+ "Collections": {
+ "TrustAllContent": True
+ },
+ "Git": {
+ "Repositories": "%s/test" % os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'git')
+ }
}
}
}
# omniauth callback method
def create
+ if !Rails.configuration.Login.LoginCluster.empty? and Rails.configuration.Login.LoginCluster != Rails.configuration.ClusterID
+ raise "Local login disabled when LoginCluster is set"
+ end
+
omniauth = request.env['omniauth.auth']
identity_url_ok = (omniauth['info']['identity_url'].length > 0) rescue false
end
p = []
p << "auth_provider=#{CGI.escape(params[:auth_provider])}" if params[:auth_provider]
- if params[:return_to]
- # Encode remote param inside callback's return_to, so that we'll get it on
- # create() after login.
- remote_param = params[:remote].nil? ? '' : params[:remote]
- p << "return_to=#{CGI.escape(remote_param + ',' + params[:return_to])}"
+
+ if !Rails.configuration.Login.LoginCluster.empty? and Rails.configuration.Login.LoginCluster != Rails.configuration.ClusterID
+ host = ApiClientAuthorization.remote_host(uuid_prefix: Rails.configuration.Login.LoginCluster)
+ if not host
+ raise "LoginCluster #{Rails.configuration.Login.LoginCluster} missing from RemoteClusters"
+ end
+ scheme = "https"
+ cluster = Rails.configuration.RemoteClusters[Rails.configuration.Login.LoginCluster]
+ if cluster and cluster['Scheme'] and !cluster['Scheme'].empty?
+ scheme = cluster['Scheme']
+ end
+ login_cluster = "#{scheme}://#{host}"
+ p << "remote=#{CGI.escape(params[:remote])}" if params[:remote]
+ p << "return_to=#{CGI.escape(params[:return_to])}" if params[:return_to]
+ redirect_to "#{login_cluster}/login?#{p.join('&')}"
+ else
+ if params[:return_to]
+ # Encode remote param inside callback's return_to, so that we'll get it on
+ # create() after login.
+ remote_param = params[:remote].nil? ? '' : params[:remote]
+ p << "return_to=#{CGI.escape(remote_param + ',' + params[:return_to])}"
+ end
+ redirect_to "/auth/joshid?#{p.join('&')}"
end
- redirect_to "/auth/joshid?#{p.join('&')}"
end
def send_api_token_to(callback_url, user, remote=nil)
end
def self.remote_host(uuid_prefix:)
- (Rails.configuration.RemoteClusters[uuid_prefix].andand.Host) ||
- (Rails.configuration.RemoteClusters["*"].Proxy &&
+ (Rails.configuration.RemoteClusters[uuid_prefix].andand["Host"]) ||
+ (Rails.configuration.RemoteClusters["*"]["Proxy"] &&
uuid_prefix+".arvadosapi.com")
end
+ def self.make_http_client
+ clnt = HTTPClient.new
+ if Rails.configuration.TLS.Insecure
+ clnt.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
+ else
+ # Use system CA certificates
+ ["/etc/ssl/certs/ca-certificates.crt",
+ "/etc/pki/tls/certs/ca-bundle.crt"]
+ .select { |ca_path| File.readable?(ca_path) }
+ .each { |ca_path| clnt.ssl_config.add_trust_ca(ca_path) }
+ end
+ clnt
+ end
+
def self.validate(token:, remote: nil)
return nil if !token
remote ||= Rails.configuration.ClusterID
case token[0..2]
when 'v2/'
- _, uuid, secret, optional = token.split('/')
- unless uuid.andand.length == 27 && secret.andand.length.andand > 0
+ _, token_uuid, secret, optional = token.split('/')
+ unless token_uuid.andand.length == 27 && secret.andand.length.andand > 0
return nil
end
# matches expections.
c = Container.where(uuid: optional).first
if !c.nil?
- if !c.auth_uuid.nil? and c.auth_uuid != uuid
+ if !c.auth_uuid.nil? and c.auth_uuid != token_uuid
# token doesn't match the container's token
return nil
end
- if !c.runtime_token.nil? and "v2/#{uuid}/#{secret}" != c.runtime_token
+ if !c.runtime_token.nil? and "v2/#{token_uuid}/#{secret}" != c.runtime_token
# token doesn't match the container's token
return nil
end
end
end
+ # fast path: look up the token in the local database
auth = ApiClientAuthorization.
includes(:user, :api_client).
- where('uuid=? and (expires_at is null or expires_at > CURRENT_TIMESTAMP)', uuid).
+ where('uuid=? and (expires_at is null or expires_at > CURRENT_TIMESTAMP)', token_uuid).
first
if auth && auth.user &&
(secret == auth.api_token ||
secret == OpenSSL::HMAC.hexdigest('sha1', auth.api_token, remote))
+ # found it
return auth
end
- uuid_prefix = uuid[0..4]
- if uuid_prefix == Rails.configuration.ClusterID
- # If the token were valid, we would have validated it above
+ token_uuid_prefix = token_uuid[0..4]
+ if token_uuid_prefix == Rails.configuration.ClusterID
+ # Token is supposedly issued by local cluster, but if the
+ # token were valid, we would have been found in the database
+ # in the above query.
return nil
- elsif uuid_prefix.length != 5
+ elsif token_uuid_prefix.length != 5
# malformed
return nil
end
- host = remote_host(uuid_prefix: uuid_prefix)
+ # Invariant: token_uuid_prefix != Rails.configuration.ClusterID
+ #
+ # In other words the remaing code in this method below is the
+ # case that determines whether to accept a token that was issued
+ # by a remote cluster when the token absent or expired in our
+ # database. To begin, we need to ask the cluster that issued
+ # the token to [re]validate it.
+ clnt = ApiClientAuthorization.make_http_client
+
+ host = remote_host(uuid_prefix: token_uuid_prefix)
if !host
- Rails.logger.warn "remote authentication rejected: no host for #{uuid_prefix.inspect}"
+ Rails.logger.warn "remote authentication rejected: no host for #{token_uuid_prefix.inspect}"
return nil
end
- # Token was issued by a different cluster. If it's expired or
- # missing in our database, ask the originating cluster to
- # [re]validate it.
begin
- clnt = HTTPClient.new
- if Rails.configuration.TLS.Insecure
- clnt.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
- else
- # Use system CA certificates
- ["/etc/ssl/certs/ca-certificates.crt",
- "/etc/pki/tls/certs/ca-bundle.crt"]
- .select { |ca_path| File.readable?(ca_path) }
- .each { |ca_path| clnt.ssl_config.add_trust_ca(ca_path) }
- end
remote_user = SafeJSON.load(
clnt.get_content('https://' + host + '/arvados/v1/users/current',
{'remote' => Rails.configuration.ClusterID},
Rails.logger.warn "remote authentication with token #{token.inspect} failed: #{e}"
return nil
end
- if !remote_user.is_a?(Hash) || !remote_user['uuid'].is_a?(String) || remote_user['uuid'][0..4] != uuid[0..4]
+
+ # Check the response is well formed.
+ if !remote_user.is_a?(Hash) || !remote_user['uuid'].is_a?(String)
Rails.logger.warn "remote authentication rejected: remote_user=#{remote_user.inspect}"
return nil
end
- act_as_system_user do
- # Add/update user and token in our database so we can
- # validate subsequent requests faster.
-
- user = User.find_or_create_by(uuid: remote_user['uuid']) do |user|
- # (this block runs for the "create" case, not for "find")
- user.is_admin = false
- user.email = remote_user['email']
- if remote_user['username'].andand.length.andand > 0
- user.set_initial_username(requested: remote_user['username'])
- end
- end
+ remote_user_prefix = remote_user['uuid'][0..4]
+
+ # Clusters can only authenticate for their own users.
+ if remote_user_prefix != token_uuid_prefix
+ Rails.logger.warn "remote authentication rejected: claimed remote user #{remote_user_prefix} but token was issued by #{token_uuid_prefix}"
+ return nil
+ end
+
+ # Invariant: remote_user_prefix == token_uuid_prefix
+ # therefore: remote_user_prefix != Rails.configuration.ClusterID
+
+ # Add or update user and token in local database so we can
+ # validate subsequent requests faster.
+
+ user = User.find_by_uuid(remote_user['uuid'])
+
+ if !user
+ # Create a new record for this user.
+ user = User.new(uuid: remote_user['uuid'],
+ is_active: false,
+ is_admin: false,
+ email: remote_user['email'],
+ owner_uuid: system_user_uuid)
+ user.set_initial_username(requested: remote_user['username'])
+ end
+
+ # Sync user record.
+ if remote_user_prefix == Rails.configuration.Login.LoginCluster
+ # Remote cluster controls our user database, copy both
+ # 'is_active' and 'is_admin'
+ user.is_active = remote_user['is_active']
+ user.is_admin = remote_user['is_admin']
+ else
if Rails.configuration.Users.NewUsersAreActive ||
- Rails.configuration.RemoteClusters[remote_user['uuid'][0..4]].andand["ActivateUsers"]
- # Update is_active to whatever it is at the remote end
+ Rails.configuration.RemoteClusters[remote_user_prefix].andand["ActivateUsers"]
+ # Default policy is to activate users, so match activate
+ # with the remote record.
user.is_active = remote_user['is_active']
elsif !remote_user['is_active']
- # Remote user is inactive; our mirror should be, too.
+ # Deactivate user if the remote is inactive, otherwise don't
+ # change 'is_active'.
user.is_active = false
end
+ end
- %w[first_name last_name email prefs].each do |attr|
- user.send(attr+'=', remote_user[attr])
- end
+ %w[first_name last_name email prefs].each do |attr|
+ user.send(attr+'=', remote_user[attr])
+ end
+ act_as_system_user do
user.save!
- auth = ApiClientAuthorization.find_or_create_by(uuid: uuid) do |auth|
+ # We will accept this token (and avoid reloading the user
+ # record) for 'RemoteTokenRefresh' (default 5 minutes).
+ # Possible todo:
+ # Request the actual api_client_auth record from the remote
+ # server in case it wants the token to expire sooner.
+ auth = ApiClientAuthorization.find_or_create_by(uuid: token_uuid) do |auth|
auth.user = user
- auth.api_token = secret
auth.api_client_id = 0
end
-
- # Accept this token (and don't reload the user record) for
- # 5 minutes. TODO: Request the actual api_client_auth
- # record from the remote server in case it wants the token
- # to expire sooner.
auth.update_attributes!(user: user,
api_token: secret,
api_client_id: 0,
- expires_at: Time.now + 5.minutes)
+ expires_at: Time.now + Rails.configuration.Login.RemoteTokenRefresh)
end
return auth
else
+ # token is not a 'v2' token
auth = ApiClientAuthorization.
- includes(:user, :api_client).
- where('api_token=? and (expires_at is null or expires_at > CURRENT_TIMESTAMP)', token).
- first
+ includes(:user, :api_client).
+ where('api_token=? and (expires_at is null or expires_at > CURRENT_TIMESTAMP)', token).
+ first
if auth && auth.user
return auth
end
end
+
return nil
end
arvcfg.declare_config "Users.NewInactiveUserNotificationRecipients", Hash, :new_inactive_user_notification_recipients, method(:arrayToHash)
arvcfg.declare_config "Login.ProviderAppSecret", NonemptyString, :sso_app_secret
arvcfg.declare_config "Login.ProviderAppID", NonemptyString, :sso_app_id
+arvcfg.declare_config "Login.LoginCluster", String
+arvcfg.declare_config "Login.RemoteTokenRefresh", ActiveSupport::Duration
arvcfg.declare_config "TLS.Insecure", Boolean, :sso_insecure
arvcfg.declare_config "Services.SSO.ExternalURL", NonemptyString, :sso_provider_url
arvcfg.declare_config "AuditLogs.MaxAge", ActiveSupport::Duration, :max_audit_log_age
case "$TARGET" in
centos*)
- fpm_depends+=(libcurl-devel postgresql-devel arvados-server)
+ fpm_depends+=(libcurl-devel postgresql-devel)
;;
debian* | ubuntu*)
- fpm_depends+=(libcurl-ssl-dev libpq-dev g++ arvados-server)
+ fpm_depends+=(libcurl-ssl-dev libpq-dev g++)
;;
esac
+++ /dev/null
-#!/usr/bin/env ruby
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-puts "**************************************
-The jobs API (crunch v1) is no longer supported. This is a stub
-script that exists only to assist in a smooth upgrade. You should
-remove crunch-dispatch.rb from your init configuration. This script
-will now sleep forever.
-**************************************
-"
-
-while true do
- sleep 10
-end
class UserSessionsControllerTest < ActionController::TestCase
- test "new user from new api client" do
+ test "redirect to joshid" do
+ api_client_page = 'http://client.example.com/home'
+ get :login, params: {return_to: api_client_page}
+ assert_response :redirect
+ assert_equal("http://test.host/auth/joshid?return_to=%2Chttp%3A%2F%2Fclient.example.com%2Fhome", @response.redirect_url)
+ assert_nil assigns(:api_client)
+ end
+
+
+ test "send token when user is already logged in" do
authorize_with :inactive
api_client_page = 'http://client.example.com/home'
get :login, params: {return_to: api_client_page}
get :login, params: {return_to: api_client_page, remote: remote_prefix}
assert_response 400
end
+
+ test "login to LoginCluster" do
+ Rails.configuration.Login.LoginCluster = 'zbbbb'
+ Rails.configuration.RemoteClusters['zbbbb'] = {'Host' => 'zbbbb.example.com'}
+ api_client_page = 'http://client.example.com/home'
+ get :login, params: {return_to: api_client_page}
+ assert_response :redirect
+ assert_equal("https://zbbbb.example.com/login?return_to=http%3A%2F%2Fclient.example.com%2Fhome", @response.redirect_url)
+ assert_nil assigns(:api_client)
+ end
+
+ test "don't go into redirect loop if LoginCluster is self" do
+ Rails.configuration.Login.LoginCluster = 'zzzzz'
+ api_client_page = 'http://client.example.com/home'
+ get :login, params: {return_to: api_client_page}
+ assert_response :redirect
+ assert_equal("http://test.host/auth/joshid?return_to=%2Chttp%3A%2F%2Fclient.example.com%2Fhome", @response.redirect_url)
+ assert_nil assigns(:api_client)
+ end
+
end
@controller = Arvados::V1::UsersController.new
ready = Thread::Queue.new
- srv = WEBrick::HTTPServer.new(
- Port: 0,
- Logger: WEBrick::Log.new(
- Rails.root.join("log", "webrick.log").to_s,
- WEBrick::Log::INFO),
- AccessLog: [[File.open(Rails.root.join(
- "log", "webrick_access.log").to_s, 'a+'),
- WEBrick::AccessLog::COMBINED_LOG_FORMAT]],
- SSLEnable: true,
- SSLVerifyClient: OpenSSL::SSL::VERIFY_NONE,
- SSLPrivateKey: OpenSSL::PKey::RSA.new(
- File.open(Rails.root.join("tmp", "self-signed.key")).read),
- SSLCertificate: OpenSSL::X509::Certificate.new(
- File.open(Rails.root.join("tmp", "self-signed.pem")).read),
- SSLCertName: [["CN", WEBrick::Utils::getservername]],
- StartCallback: lambda { ready.push(true) })
- srv.mount_proc '/discovery/v1/apis/arvados/v1/rest' do |req, res|
- Rails.cache.delete 'arvados_v1_rest_discovery'
- res.body = Arvados::V1::SchemaController.new.send(:discovery_doc).to_json
- end
- srv.mount_proc '/arvados/v1/users/current' do |req, res|
- res.status = @stub_status
- res.body = @stub_content.is_a?(String) ? @stub_content : @stub_content.to_json
- end
- Thread.new do
- srv.start
+
+ @remote_server = []
+ @remote_host = []
+
+ ['zbbbb', 'zbork'].each do |clusterid|
+ srv = WEBrick::HTTPServer.new(
+ Port: 0,
+ Logger: WEBrick::Log.new(
+ Rails.root.join("log", "webrick.log").to_s,
+ WEBrick::Log::INFO),
+ AccessLog: [[File.open(Rails.root.join(
+ "log", "webrick_access.log").to_s, 'a+'),
+ WEBrick::AccessLog::COMBINED_LOG_FORMAT]],
+ SSLEnable: true,
+ SSLVerifyClient: OpenSSL::SSL::VERIFY_NONE,
+ SSLPrivateKey: OpenSSL::PKey::RSA.new(
+ File.open(Rails.root.join("tmp", "self-signed.key")).read),
+ SSLCertificate: OpenSSL::X509::Certificate.new(
+ File.open(Rails.root.join("tmp", "self-signed.pem")).read),
+ SSLCertName: [["CN", WEBrick::Utils::getservername]],
+ StartCallback: lambda { ready.push(true) })
+ srv.mount_proc '/discovery/v1/apis/arvados/v1/rest' do |req, res|
+ Rails.cache.delete 'arvados_v1_rest_discovery'
+ res.body = Arvados::V1::SchemaController.new.send(:discovery_doc).to_json
+ end
+ srv.mount_proc '/arvados/v1/users/current' do |req, res|
+ if clusterid == 'zbbbb' and req.header['authorization'][0][10..14] == 'zbork'
+ # asking zbbbb about zbork should yield an error, zbbbb doesn't trust zbork
+ res.status = 401
+ return
+ end
+ res.status = @stub_status
+ res.body = @stub_content.is_a?(String) ? @stub_content : @stub_content.to_json
+ end
+ srv.mount_proc '/arvados/v1/users/register' do |req, res|
+ res.status = @stub_status
+ res.body = @stub_content.is_a?(String) ? @stub_content : @stub_content.to_json
+ end
+ Thread.new do
+ srv.start
+ end
+ ready.pop
+ @remote_server << srv
+ @remote_host << "127.0.0.1:#{srv.config[:Port]}"
end
- ready.pop
- @remote_server = srv
- @remote_host = "127.0.0.1:#{srv.config[:Port]}"
- Rails.configuration.RemoteClusters = Rails.configuration.RemoteClusters.merge({zbbbb: ActiveSupport::InheritableOptions.new({Host: @remote_host}),
- zbork: ActiveSupport::InheritableOptions.new({Host: @remote_host})})
- Arvados::V1::SchemaController.any_instance.stubs(:root_url).returns "https://#{@remote_host}"
+ Rails.configuration.RemoteClusters = Rails.configuration.RemoteClusters.merge({zbbbb: ActiveSupport::InheritableOptions.new({Host: @remote_host[0]}),
+ zbork: ActiveSupport::InheritableOptions.new({Host: @remote_host[1]})})
+ Arvados::V1::SchemaController.any_instance.stubs(:root_url).returns "https://#{@remote_host[0]}"
@stub_status = 200
@stub_content = {
uuid: 'zbbbb-tpzed-000000000000000',
end
teardown do
- @remote_server.andand.stop
+ @remote_server.each do |srv|
+ srv.stop
+ end
end
test 'authenticate with remote token' do
assert_equal 'foo', json_response['username']
end
- test 'authenticate with remote token from misbhehaving remote cluster' do
+ test 'authenticate with remote token from misbehaving remote cluster' do
get '/arvados/v1/users/current',
params: {format: 'json'},
headers: auth(remote: 'zbork')
assert_equal 'barney', json_response['username']
end
+ test 'get user from Login cluster' do
+ Rails.configuration.Login.LoginCluster = 'zbbbb'
+ get '/arvados/v1/users/current',
+ params: {format: 'json'},
+ headers: auth(remote: 'zbbbb')
+ assert_response :success
+ assert_equal 'zbbbb-tpzed-000000000000000', json_response['uuid']
+ assert_equal true, json_response['is_admin']
+ assert_equal true, json_response['is_active']
+ assert_equal 'foo@example.com', json_response['email']
+ assert_equal 'barney', json_response['username']
+ end
+
test 'pre-activate remote user' do
post '/arvados/v1/users',
params: {
Description=Arvados git server
Documentation=https://doc.arvados.org/
After=network.target
-AssertPathExists=/etc/arvados/git-httpd/git-httpd.yml
# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
StartLimitInterval=0
"sync"
"time"
+ "git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
"git.curoverse.com/arvados.git/sdk/go/auth"
"git.curoverse.com/arvados.git/sdk/go/httpserver"
type authHandler struct {
handler http.Handler
clientPool *arvadosclient.ClientPool
+ cluster *arvados.Cluster
setupOnce sync.Once
}
func (h *authHandler) setup() {
- ac, err := arvadosclient.New(&theConfig.Client)
+ client, err := arvados.NewClientFromConfig(h.cluster)
if err != nil {
log.Fatal(err)
}
+
+ ac, err := arvadosclient.New(client)
+ if err != nil {
+ log.Fatalf("Error setting up arvados client prototype %v", err)
+ }
+
h.clientPool = &arvadosclient.ClientPool{Prototype: ac}
}
"/" + repoName + "/.git",
}
for _, dir := range tryDirs {
- if fileInfo, err := os.Stat(theConfig.RepoRoot + dir); err != nil {
+ if fileInfo, err := os.Stat(h.cluster.Git.Repositories + dir); err != nil {
if !os.IsNotExist(err) {
statusCode, statusText = http.StatusInternalServerError, err.Error()
return
}
if rewrittenPath == "" {
log.Println("WARNING:", repoUUID,
- "git directory not found in", theConfig.RepoRoot, tryDirs)
+ "git directory not found in", h.cluster.Git.Repositories, tryDirs)
// We say "content not found" to disambiguate from the
// earlier "API says that repo does not exist" error.
statusCode, statusText = http.StatusNotFound, "content not found"
"path/filepath"
"strings"
+ "git.curoverse.com/arvados.git/lib/config"
"git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/arvadostest"
check "gopkg.in/check.v1"
var _ = check.Suite(&AuthHandlerSuite{})
-type AuthHandlerSuite struct{}
+type AuthHandlerSuite struct {
+ cluster *arvados.Cluster
+}
func (s *AuthHandlerSuite) SetUpSuite(c *check.C) {
arvadostest.StartAPI()
arvadostest.ResetEnv()
repoRoot, err := filepath.Abs("../api/tmp/git/test")
c.Assert(err, check.IsNil)
- theConfig = &Config{
- Client: arvados.Client{
- APIHost: arvadostest.APIHost(),
- Insecure: true,
- },
- Listen: ":0",
- GitCommand: "/usr/bin/git",
- RepoRoot: repoRoot,
- ManagementToken: arvadostest.ManagementToken,
- }
+
+ cfg, err := config.NewLoader(nil, nil).Load()
+ c.Assert(err, check.Equals, nil)
+ s.cluster, err = cfg.GetCluster("")
+ c.Assert(err, check.Equals, nil)
+
+ s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: "localhost:0"}: arvados.ServiceInstance{}}
+ s.cluster.TLS.Insecure = true
+ s.cluster.Git.GitCommand = "/usr/bin/git"
+ s.cluster.Git.Repositories = repoRoot
}
func (s *AuthHandlerSuite) TestPermission(c *check.C) {
h := &authHandler{handler: http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
log.Printf("%v", r.URL)
io.WriteString(w, r.URL.Path)
- })}
+ }), cluster: s.cluster}
baseURL, err := url.Parse("http://git.example/")
c.Assert(err, check.IsNil)
for _, trial := range []struct {
}
func (s *AuthHandlerSuite) TestCORS(c *check.C) {
- h := &authHandler{}
+ h := &authHandler{cluster: s.cluster}
// CORS preflight
resp := httptest.NewRecorder()
"net/http"
"net/http/cgi"
"os"
+
+ "git.curoverse.com/arvados.git/sdk/go/arvados"
)
// gitHandler is an http.Handler that invokes git-http-backend (or
cgi.Handler
}
-func newGitHandler() http.Handler {
+func newGitHandler(cluster *arvados.Cluster) http.Handler {
const glBypass = "GL_BYPASS_ACCESS_CHECKS"
const glHome = "GITOLITE_HTTP_HOME"
var env []string
path := os.Getenv("PATH")
- if theConfig.GitoliteHome != "" {
+ if cluster.Git.GitoliteHome != "" {
env = append(env,
- glHome+"="+theConfig.GitoliteHome,
+ glHome+"="+cluster.Git.GitoliteHome,
glBypass+"=1")
- path = path + ":" + theConfig.GitoliteHome + "/bin"
+ path = path + ":" + cluster.Git.GitoliteHome + "/bin"
} else if home, bypass := os.Getenv(glHome), os.Getenv(glBypass); home != "" || bypass != "" {
env = append(env, glHome+"="+home, glBypass+"="+bypass)
log.Printf("DEPRECATED: Passing through %s and %s environment variables. Use GitoliteHome configuration instead.", glHome, glBypass)
}
+
+ var listen arvados.URL
+ for listen = range cluster.Services.GitHTTP.InternalURLs {
+ break
+ }
env = append(env,
- "GIT_PROJECT_ROOT="+theConfig.RepoRoot,
+ "GIT_PROJECT_ROOT="+cluster.Git.Repositories,
"GIT_HTTP_EXPORT_ALL=",
- "SERVER_ADDR="+theConfig.Listen,
+ "SERVER_ADDR="+listen.Host,
"PATH="+path)
return &gitHandler{
Handler: cgi.Handler{
- Path: theConfig.GitCommand,
- Dir: theConfig.RepoRoot,
+ Path: cluster.Git.GitCommand,
+ Dir: cluster.Git.Repositories,
Env: env,
Args: []string{"http-backend"},
},
"net/url"
"regexp"
+ "git.curoverse.com/arvados.git/lib/config"
+ "git.curoverse.com/arvados.git/sdk/go/arvados"
check "gopkg.in/check.v1"
)
var _ = check.Suite(&GitHandlerSuite{})
-type GitHandlerSuite struct{}
+type GitHandlerSuite struct {
+ cluster *arvados.Cluster
+}
-func (s *GitHandlerSuite) TestEnvVars(c *check.C) {
- theConfig = defaultConfig()
- theConfig.RepoRoot = "/"
- theConfig.GitoliteHome = "/test/ghh"
+func (s *GitHandlerSuite) SetUpTest(c *check.C) {
+ cfg, err := config.NewLoader(nil, nil).Load()
+ c.Assert(err, check.Equals, nil)
+ s.cluster, err = cfg.GetCluster("")
+ c.Assert(err, check.Equals, nil)
+ s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: "localhost:80"}: arvados.ServiceInstance{}}
+ s.cluster.Git.GitoliteHome = "/test/ghh"
+ s.cluster.Git.Repositories = "/"
+}
+
+func (s *GitHandlerSuite) TestEnvVars(c *check.C) {
u, err := url.Parse("git.zzzzz.arvadosapi.com/test")
c.Check(err, check.Equals, nil)
resp := httptest.NewRecorder()
URL: u,
RemoteAddr: "[::1]:12345",
}
- h := newGitHandler()
+ h := newGitHandler(s.cluster)
h.(*gitHandler).Path = "/bin/sh"
h.(*gitHandler).Args = []string{"-c", "printf 'Content-Type: text/plain\r\n\r\n'; env"}
c.Check(body, check.Matches, `(?ms).*^GL_BYPASS_ACCESS_CHECKS=1$.*`)
c.Check(body, check.Matches, `(?ms).*^REMOTE_HOST=::1$.*`)
c.Check(body, check.Matches, `(?ms).*^REMOTE_PORT=12345$.*`)
- c.Check(body, check.Matches, `(?ms).*^SERVER_ADDR=`+regexp.QuoteMeta(theConfig.Listen)+`$.*`)
+ c.Check(body, check.Matches, `(?ms).*^SERVER_ADDR=`+regexp.QuoteMeta("localhost:80")+`$.*`)
}
func (s *GitHandlerSuite) TestCGIErrorOnSplitHostPortError(c *check.C) {
URL: u,
RemoteAddr: "test.bad.address.missing.port",
}
- h := newGitHandler()
+ h := newGitHandler(s.cluster)
h.ServeHTTP(resp, req)
c.Check(resp.Code, check.Equals, http.StatusInternalServerError)
c.Check(resp.Body.String(), check.Equals, "")
"os/exec"
"strings"
+ "git.curoverse.com/arvados.git/lib/config"
"git.curoverse.com/arvados.git/sdk/go/arvados"
- "git.curoverse.com/arvados.git/sdk/go/arvadostest"
check "gopkg.in/check.v1"
)
runGitolite("gitolite", "setup", "--admin", "root")
s.tmpRepoRoot = s.gitoliteHome + "/repositories"
- s.Config = &Config{
- Client: arvados.Client{
- APIHost: arvadostest.APIHost(),
- Insecure: true,
- },
- Listen: "localhost:0",
- GitCommand: "/usr/share/gitolite3/gitolite-shell",
- GitoliteHome: s.gitoliteHome,
- RepoRoot: s.tmpRepoRoot,
- }
+
+ cfg, err := config.NewLoader(nil, nil).Load()
+ c.Assert(err, check.Equals, nil)
+ s.cluster, err = cfg.GetCluster("")
+ c.Assert(err, check.Equals, nil)
+
+ s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: "localhost:0"}: arvados.ServiceInstance{}}
+ s.cluster.TLS.Insecure = true
+ s.cluster.Git.GitCommand = "/usr/share/gitolite3/gitolite-shell"
+ s.cluster.Git.GitoliteHome = s.gitoliteHome
+ s.cluster.Git.Repositories = s.tmpRepoRoot
+
s.IntegrationSuite.SetUpTest(c)
// Install the gitolite hooks in the bare repo we made in
"strings"
"testing"
+ "git.curoverse.com/arvados.git/lib/config"
"git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/arvadostest"
check "gopkg.in/check.v1"
tmpRepoRoot string
tmpWorkdir string
testServer *server
- Config *Config
+ cluster *arvados.Cluster
}
func (s *IntegrationSuite) SetUpSuite(c *check.C) {
func (s *IntegrationSuite) SetUpTest(c *check.C) {
arvadostest.ResetEnv()
- s.testServer = &server{}
+
var err error
if s.tmpRepoRoot == "" {
s.tmpRepoRoot, err = ioutil.TempDir("", "arv-git-httpd")
_, err = exec.Command("sh", "-c", "cd "+s.tmpWorkdir+" && echo work >work && git add work && git -c user.name=Foo -c user.email=Foo commit -am 'workdir: test'").CombinedOutput()
c.Assert(err, check.Equals, nil)
+ if s.cluster == nil {
+ cfg, err := config.NewLoader(nil, nil).Load()
+ c.Assert(err, check.Equals, nil)
+ s.cluster, err = cfg.GetCluster("")
+ c.Assert(err, check.Equals, nil)
+
+ s.cluster.Services.GitHTTP.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: "localhost:0"}: arvados.ServiceInstance{}}
+ s.cluster.TLS.Insecure = true
+ s.cluster.Git.GitCommand = "/usr/bin/git"
+ s.cluster.Git.Repositories = s.tmpRepoRoot
+ s.cluster.ManagementToken = arvadostest.ManagementToken
+ }
+
+ s.testServer = &server{cluster: s.cluster}
+ err = s.testServer.Start()
+ c.Assert(err, check.Equals, nil)
+
_, err = exec.Command("git", "config",
"--file", s.tmpWorkdir+"/.git/config",
"credential.http://"+s.testServer.Addr+"/.helper",
"none").Output()
c.Assert(err, check.Equals, nil)
- if s.Config == nil {
- s.Config = &Config{
- Client: arvados.Client{
- APIHost: arvadostest.APIHost(),
- Insecure: true,
- },
- Listen: "localhost:0",
- GitCommand: "/usr/bin/git",
- RepoRoot: s.tmpRepoRoot,
- ManagementToken: arvadostest.ManagementToken,
- }
- }
-
// Clear ARVADOS_API_* env vars before starting up the server,
// to make sure arv-git-httpd doesn't use them or complain
// about them being missing.
os.Unsetenv("ARVADOS_API_HOST")
os.Unsetenv("ARVADOS_API_HOST_INSECURE")
os.Unsetenv("ARVADOS_API_TOKEN")
-
- theConfig = s.Config
- err = s.testServer.Start()
- c.Assert(err, check.Equals, nil)
}
func (s *IntegrationSuite) TearDownTest(c *check.C) {
}
s.tmpWorkdir = ""
- s.Config = nil
-
- theConfig = defaultConfig()
+ s.cluster = nil
}
func (s *IntegrationSuite) RunGit(c *check.C, token, gitCmd, repo string, args ...string) error {
package main
import (
- "encoding/json"
"flag"
"fmt"
- "log"
"os"
- "regexp"
- "git.curoverse.com/arvados.git/sdk/go/arvados"
- "git.curoverse.com/arvados.git/sdk/go/config"
+ "git.curoverse.com/arvados.git/lib/config"
"github.com/coreos/go-systemd/daemon"
+ "github.com/ghodss/yaml"
+ log "github.com/sirupsen/logrus"
)
var version = "dev"
-// Server configuration
-type Config struct {
- Client arvados.Client
- Listen string
- GitCommand string
- RepoRoot string
- GitoliteHome string
- ManagementToken string
-}
-
-var theConfig = defaultConfig()
-
-func defaultConfig() *Config {
- return &Config{
- Listen: ":80",
- GitCommand: "/usr/bin/git",
- RepoRoot: "/var/lib/arvados/git/repositories",
- }
-}
-
func main() {
- const defaultCfgPath = "/etc/arvados/git-httpd/git-httpd.yml"
- const deprecated = " (DEPRECATED -- use config file instead)"
- flag.StringVar(&theConfig.Listen, "address", theConfig.Listen,
- "Address to listen on, \"host:port\" or \":port\"."+deprecated)
- flag.StringVar(&theConfig.GitCommand, "git-command", theConfig.GitCommand,
- "Path to git or gitolite-shell executable. Each authenticated request will execute this program with a single argument, \"http-backend\"."+deprecated)
- flag.StringVar(&theConfig.RepoRoot, "repo-root", theConfig.RepoRoot,
- "Path to git repositories."+deprecated)
- flag.StringVar(&theConfig.GitoliteHome, "gitolite-home", theConfig.GitoliteHome,
- "Value for GITOLITE_HTTP_HOME environment variable. If not empty, GL_BYPASS_ACCESS_CHECKS=1 will also be set."+deprecated)
+ logger := log.New()
+ log.SetFormatter(&log.JSONFormatter{
+ TimestampFormat: "2006-01-02T15:04:05.000000000Z07:00",
+ })
- cfgPath := flag.String("config", defaultCfgPath, "Configuration file `path`.")
- dumpConfig := flag.Bool("dump-config", false, "write current configuration to stdout and exit (useful for migrating from command line flags to config file)")
- getVersion := flag.Bool("version", false, "print version information and exit.")
+ flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
+ loader := config.NewLoader(os.Stdin, logger)
+ loader.SetupFlags(flags)
- flag.StringVar(&theConfig.ManagementToken, "management-token", theConfig.ManagementToken,
- "Authorization token to be included in all health check requests.")
+ dumpConfig := flags.Bool("dump-config", false, "write current configuration to stdout and exit (useful for migrating from command line flags to config file)")
+ getVersion := flags.Bool("version", false, "print version information and exit.")
- flag.Usage = usage
- flag.Parse()
+ args := loader.MungeLegacyConfigArgs(logger, os.Args[1:], "-legacy-git-httpd-config")
+ flags.Parse(args)
- // Print version information if requested
if *getVersion {
fmt.Printf("arv-git-httpd %s\n", version)
return
}
- err := config.LoadFile(theConfig, *cfgPath)
+ cfg, err := loader.Load()
if err != nil {
- h := os.Getenv("ARVADOS_API_HOST")
- if h == "" || !os.IsNotExist(err) || *cfgPath != defaultCfgPath {
- log.Fatal(err)
- }
- log.Print("DEPRECATED: No config file found, but ARVADOS_API_HOST environment variable is set. Please use a config file instead.")
- theConfig.Client.APIHost = h
- if regexp.MustCompile("^(?i:1|yes|true)$").MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")) {
- theConfig.Client.Insecure = true
- }
- if j, err := json.MarshalIndent(theConfig, "", " "); err == nil {
- log.Print("Current configuration:\n", string(j))
- }
+ log.Fatal(err)
+ }
+
+ cluster, err := cfg.GetCluster("")
+ if err != nil {
+ log.Fatal(err)
}
if *dumpConfig {
- log.Fatal(config.DumpAndExit(theConfig))
+ out, err := yaml.Marshal(cfg)
+ if err != nil {
+ log.Fatal(err)
+ }
+ _, err = os.Stdout.Write(out)
+ if err != nil {
+ log.Fatal(err)
+ }
+ return
}
- srv := &server{}
+ srv := &server{cluster: cluster}
if err := srv.Start(); err != nil {
log.Fatal(err)
}
}
log.Printf("arv-git-httpd %s started", version)
log.Println("Listening at", srv.Addr)
- log.Println("Repository root", theConfig.RepoRoot)
+ log.Println("Repository root", cluster.Git.Repositories)
if err := srv.Wait(); err != nil {
log.Fatal(err)
}
import (
"net/http"
+ "git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/health"
"git.curoverse.com/arvados.git/sdk/go/httpserver"
)
type server struct {
httpserver.Server
+ cluster *arvados.Cluster
}
func (srv *server) Start() error {
mux := http.NewServeMux()
- mux.Handle("/", &authHandler{handler: newGitHandler()})
+ mux.Handle("/", &authHandler{handler: newGitHandler(srv.cluster), cluster: srv.cluster})
mux.Handle("/_health/", &health.Handler{
- Token: theConfig.ManagementToken,
+ Token: srv.cluster.ManagementToken,
Prefix: "/_health/",
})
+
+ var listen arvados.URL
+ for listen = range srv.cluster.Services.GitHTTP.InternalURLs {
+ break
+ }
+
srv.Handler = mux
- srv.Addr = theConfig.Listen
+ srv.Addr = listen.Host
return srv.Server.Start()
}
+++ /dev/null
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-// arvados-git-httpd provides authenticated access to Arvados-hosted
-// git repositories.
-//
-// See http://doc.arvados.org/install/install-arv-git-httpd.html.
-package main
-
-import (
- "flag"
- "fmt"
- "os"
-
- "github.com/ghodss/yaml"
-)
-
-func usage() {
- c := defaultConfig()
- c.Client.APIHost = "zzzzz.arvadosapi.com:443"
- exampleConfigFile, err := yaml.Marshal(c)
- if err != nil {
- panic(err)
- }
- fmt.Fprintf(os.Stderr, `
-
-arvados-git-httpd provides authenticated access to Arvados-hosted git
-repositories.
-
-See http://doc.arvados.org/install/install-arv-git-httpd.html.
-
-Usage: arvados-git-httpd [-config path/to/arvados/git-httpd.yml]
-
-Options:
-`)
- flag.PrintDefaults()
- fmt.Fprintf(os.Stderr, `
-Example config file:
-
-%s
-
-Client.APIHost:
-
- Address (or address:port) of the Arvados API endpoint.
-
-Client.AuthToken:
-
- Unused. Normally empty, or omitted entirely.
-
-Client.Insecure:
-
- True if your Arvados API endpoint uses an unverifiable SSL/TLS
- certificate.
-
-GitCommand:
-
- Path to git or gitolite-shell executable. Each authenticated
- request will execute this program with the single argument
- "http-backend".
-
-GitoliteHome:
-
- Path to Gitolite's home directory. If a non-empty path is given,
- the CGI environment will be set up to support the use of
- gitolite-shell as a GitCommand: for example, if GitoliteHome is
- "/gh", then the CGI environment will have GITOLITE_HTTP_HOME=/gh,
- PATH=$PATH:/gh/bin, and GL_BYPASS_ACCESS_CHECKS=1.
-
-Listen:
-
- Local port to listen on. Can be "address:port" or ":port", where
- "address" is a host IP address or name and "port" is a port number
- or name.
-
-RepoRoot:
-
- Path to git repositories.
-
-`, exampleConfigFile)
-}
@create_time.time()
@catch_exceptions
def create(self, inode_parent, name, mode, flags, ctx=None):
- name = name.decode()
+ name = name.decode(encoding=self.inodes.encoding)
_logger.debug("arv-mount create: parent_inode %i '%s' %o", inode_parent, name, mode)
p = self._check_writable(inode_parent)
@mkdir_time.time()
@catch_exceptions
def mkdir(self, inode_parent, name, mode, ctx=None):
- name = name.decode()
+ name = name.decode(encoding=self.inodes.encoding)
_logger.debug("arv-mount mkdir: parent_inode %i '%s' %o", inode_parent, name, mode)
p = self._check_writable(inode_parent)
@unlink_time.time()
@catch_exceptions
def unlink(self, inode_parent, name, ctx=None):
+ name = name.decode(encoding=self.inodes.encoding)
_logger.debug("arv-mount unlink: parent_inode %i '%s'", inode_parent, name)
p = self._check_writable(inode_parent)
- p.unlink(name.decode())
+ p.unlink(name)
@rmdir_time.time()
@catch_exceptions
def rmdir(self, inode_parent, name, ctx=None):
+ name = name.decode(encoding=self.inodes.encoding)
_logger.debug("arv-mount rmdir: parent_inode %i '%s'", inode_parent, name)
p = self._check_writable(inode_parent)
- p.rmdir(name.decode())
+ p.rmdir(name)
@rename_time.time()
@catch_exceptions
def rename(self, inode_parent_old, name_old, inode_parent_new, name_new, ctx=None):
+ name_old = name_old.decode(encoding=self.inodes.encoding)
+ name_new = name_new.decode(encoding=self.inodes.encoding)
_logger.debug("arv-mount rename: old_parent_inode %i '%s' new_parent_inode %i '%s'", inode_parent_old, name_old, inode_parent_new, name_new)
src = self._check_writable(inode_parent_old)
dest = self._check_writable(inode_parent_new)
- dest.rename(name_old.decode(), name_new.decode(), src)
+ dest.rename(name_old, name_new, src)
@flush_time.time()
@catch_exceptions
const metricsUpdateInterval = time.Second / 10
type cache struct {
- TTL arvados.Duration
- UUIDTTL arvados.Duration
- MaxCollectionEntries int
- MaxCollectionBytes int64
- MaxPermissionEntries int
- MaxUUIDEntries int
-
+ config *arvados.WebDAVCacheConfig
registry *prometheus.Registry
metrics cacheMetrics
pdhs *lru.TwoQueueCache
func (c *cache) setup() {
var err error
- c.pdhs, err = lru.New2Q(c.MaxUUIDEntries)
+ c.pdhs, err = lru.New2Q(c.config.MaxUUIDEntries)
if err != nil {
panic(err)
}
- c.collections, err = lru.New2Q(c.MaxCollectionEntries)
+ c.collections, err = lru.New2Q(c.config.MaxCollectionEntries)
if err != nil {
panic(err)
}
- c.permissions, err = lru.New2Q(c.MaxPermissionEntries)
+ c.permissions, err = lru.New2Q(c.config.MaxPermissionEntries)
if err != nil {
panic(err)
}
})
if err == nil {
c.collections.Add(client.AuthToken+"\000"+coll.PortableDataHash, &cachedCollection{
- expire: time.Now().Add(time.Duration(c.TTL)),
+ expire: time.Now().Add(time.Duration(c.config.TTL)),
collection: &updated,
})
}
}
if current.PortableDataHash == pdh {
c.permissions.Add(permKey, &cachedPermission{
- expire: time.Now().Add(time.Duration(c.TTL)),
+ expire: time.Now().Add(time.Duration(c.config.TTL)),
})
if pdh != targetID {
c.pdhs.Add(targetID, &cachedPDH{
- expire: time.Now().Add(time.Duration(c.UUIDTTL)),
+ expire: time.Now().Add(time.Duration(c.config.UUIDTTL)),
pdh: pdh,
})
}
if err != nil {
return nil, err
}
- exp := time.Now().Add(time.Duration(c.TTL))
+ exp := time.Now().Add(time.Duration(c.config.TTL))
c.permissions.Add(permKey, &cachedPermission{
expire: exp,
})
c.pdhs.Add(targetID, &cachedPDH{
- expire: time.Now().Add(time.Duration(c.UUIDTTL)),
+ expire: time.Now().Add(time.Duration(c.config.UUIDTTL)),
pdh: collection.PortableDataHash,
})
c.collections.Add(arv.ApiToken+"\000"+collection.PortableDataHash, &cachedCollection{
expire: exp,
collection: collection,
})
- if int64(len(collection.ManifestText)) > c.MaxCollectionBytes/int64(c.MaxCollectionEntries) {
+ if int64(len(collection.ManifestText)) > c.config.MaxCollectionBytes/int64(c.config.MaxCollectionEntries) {
go c.pruneCollections()
}
return collection, nil
}
}
for i, k := range keys {
- if size <= c.MaxCollectionBytes {
+ if size <= c.config.MaxCollectionBytes {
break
}
if expired[i] {
arv, err := arvadosclient.MakeArvadosClient()
c.Assert(err, check.Equals, nil)
- cache := DefaultConfig().Cache
+ cache := newConfig(s.Config).Cache
cache.registry = prometheus.NewRegistry()
// Hit the same collection 5 times using the same token. Only
arv, err := arvadosclient.MakeArvadosClient()
c.Assert(err, check.Equals, nil)
- cache := DefaultConfig().Cache
+ cache := newConfig(s.Config).Cache
cache.registry = prometheus.NewRegistry()
for _, forceReload := range []bool{false, true, false, true} {
arv, err := arvadosclient.MakeArvadosClient()
c.Assert(err, check.Equals, nil)
- cache := DefaultConfig().Cache
+ cache := newConfig(s.Config).Cache
cache.registry = prometheus.NewRegistry()
for _, forceReload := range []bool{false, true, false, true} {
}
func (s *IntegrationSuite) testCadaver(c *check.C, password string, pathFunc func(arvados.Collection) (string, string, string), skip func(string) bool) {
- s.testServer.Config.AnonymousTokens = []string{arvadostest.AnonymousToken}
+ s.testServer.Config.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
testdata := []byte("the human tragedy consists in the necessity of living with the consequences of actions performed under the pressure of compulsions we do not understand")
// unauthenticated request, which it only does in
// AttachmentOnlyHost, TrustAllContent, and
// per-collection vhost cases.
- s.testServer.Config.AttachmentOnlyHost = s.testServer.Addr
+ s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = s.testServer.Addr
cmd.Env = append(os.Environ(), "HOME="+tempdir)
f, err := os.OpenFile(filepath.Join(tempdir, ".netrc"), os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0600)
//
// Configuration
//
-// The default configuration file location is
-// /etc/arvados/keep-web/keep-web.yml.
+// The default cluster configuration file location is
+// /etc/arvados/config.yml.
//
// Example configuration file
//
-// Client:
-// APIHost: "zzzzz.arvadosapi.com:443"
-// AuthToken: ""
-// Insecure: false
-// Listen: :1234
-// AnonymousTokens:
-// - xxxxxxxxxxxxxxxxxxxx
-// AttachmentOnlyHost: ""
-// TrustAllContent: false
+// Clusters:
+// zzzzz:
+// SystemRootToken: ""
+// Services:
+// Controller:
+// ExternalURL: "https://example.com"
+// Insecure: false
+// WebDAV:
+// InternalURLs:
+// "http://:1234/": {}
+// WebDAVDownload:
+// InternalURLs:
+// "http://:1234/": {}
+// ExternalURL: "https://download.example.com/"
+// Users:
+// AnonymousUserToken: "xxxxxxxxxxxxxxxxxxxx"
+// Collections:
+// TrustAllContent: false
//
// Starting the server
//
// Start a server using the default config file
-// /etc/arvados/keep-web/keep-web.yml:
+// /etc/arvados/config.yml:
//
// keep-web
//
-// Start a server using the config file /path/to/keep-web.yml:
+// Start a server using the config file /path/to/config.yml:
//
-// keep-web -config /path/to/keep-web.yml
+// keep-web -config /path/to/config.yml
//
// Proxy configuration
//
//
// Anonymous downloads
//
-// The "AnonymousTokens" configuration entry is an array of tokens to
-// use when processing anonymous requests, i.e., whenever a web client
+// The "Users.AnonymousUserToken" configuration entry used when
+// when processing anonymous requests, i.e., whenever a web client
// does not supply its own Arvados API token via path, query string,
// cookie, or request header.
//
-// "AnonymousTokens":["xxxxxxxxxxxxxxxxxxxxxxx"]
+// Clusters:
+// zzzzz:
+// Users:
+// AnonymousUserToken: "xxxxxxxxxxxxxxxxxxxxxxx"
//
// See http://doc.arvados.org/install/install-keep-web.html for examples.
//
// only when the designated origin matches exactly the Host header
// provided by the client or downstream proxy.
//
-// "AttachmentOnlyHost":"domain.example:9999"
+// Clusters:
+// zzzzz:
+// Services:
+// WebDAVDownload:
+// ExternalURL: "https://domain.example:9999"
//
// Trust All Content mode
//
//
// In such cases you can enable trust-all-content mode.
//
-// "TrustAllContent":true
+// Clusters:
+// zzzzz:
+// Collections:
+// TrustAllContent: true
//
// When TrustAllContent is enabled, the only effect of the
-// AttachmentOnlyHost flag is to add a "Content-Disposition:
+// Attachment-Only host setting is to add a "Content-Disposition:
// attachment" header.
//
-// "AttachmentOnlyHost":"domain.example:9999",
-// "TrustAllContent":true
+// Clusters:
+// zzzzz:
+// Services:
+// WebDAVDownload:
+// ExternalURL: "https://domain.example:9999"
+// Collections:
+// TrustAllContent: true
//
// Depending on your site configuration, you might also want to enable
// the "trust all content" setting in Workbench. Normally, Workbench
h.clientPool = arvadosclient.MakeClientPool()
keepclient.RefreshServiceDiscoveryOnSIGHUP()
+ keepclient.DefaultBlockCache.MaxBlocks = h.Config.cluster.Collections.WebDAVCache.MaxBlockEntries
h.healthHandler = &health.Handler{
- Token: h.Config.ManagementToken,
+ Token: h.Config.cluster.ManagementToken,
Prefix: "/_health/",
}
var pathToken bool
var attachment bool
var useSiteFS bool
- credentialsOK := h.Config.TrustAllContent
+ credentialsOK := h.Config.cluster.Collections.TrustAllContent
- if r.Host != "" && r.Host == h.Config.AttachmentOnlyHost {
+ if r.Host != "" && r.Host == h.Config.cluster.Services.WebDAVDownload.ExternalURL.Host {
credentialsOK = true
attachment = true
} else if r.FormValue("disposition") == "attachment" {
}
if tokens == nil {
- tokens = append(reqTokens, h.Config.AnonymousTokens...)
+ tokens = append(reqTokens, h.Config.cluster.Users.AnonymousUserToken)
}
if len(targetPath) > 0 && targetPath[0] == "_" {
"regexp"
"strings"
+ "git.curoverse.com/arvados.git/lib/config"
"git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/arvadostest"
"git.curoverse.com/arvados.git/sdk/go/auth"
+ "git.curoverse.com/arvados.git/sdk/go/keepclient"
check "gopkg.in/check.v1"
)
var _ = check.Suite(&UnitSuite{})
-type UnitSuite struct{}
+type UnitSuite struct {
+ Config *arvados.Config
+}
+
+func (s *UnitSuite) SetUpTest(c *check.C) {
+ ldr := config.NewLoader(bytes.NewBufferString("Clusters: {zzzzz: {}}"), nil)
+ ldr.Path = "-"
+ cfg, err := ldr.Load()
+ c.Assert(err, check.IsNil)
+ s.Config = cfg
+}
+
+func (s *UnitSuite) TestKeepClientBlockCache(c *check.C) {
+ cfg := newConfig(s.Config)
+ cfg.cluster.Collections.WebDAVCache.MaxBlockEntries = 42
+ h := handler{Config: cfg}
+ c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Not(check.Equals), cfg.cluster.Collections.WebDAVCache.MaxBlockEntries)
+ u := mustParseURL("http://keep-web.example/c=" + arvadostest.FooCollection + "/t=" + arvadostest.ActiveToken + "/foo")
+ req := &http.Request{
+ Method: "GET",
+ Host: u.Host,
+ URL: u,
+ RequestURI: u.RequestURI(),
+ }
+ resp := httptest.NewRecorder()
+ h.ServeHTTP(resp, req)
+ c.Check(resp.Code, check.Equals, http.StatusOK)
+ c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Equals, cfg.cluster.Collections.WebDAVCache.MaxBlockEntries)
+}
func (s *UnitSuite) TestCORSPreflight(c *check.C) {
- h := handler{Config: DefaultConfig()}
+ h := handler{Config: newConfig(s.Config)}
u := mustParseURL("http://keep-web.example/c=" + arvadostest.FooCollection + "/foo")
req := &http.Request{
Method: "OPTIONS",
RequestURI: u.RequestURI(),
}
resp := httptest.NewRecorder()
- cfg := DefaultConfig()
- cfg.AnonymousTokens = []string{arvadostest.AnonymousToken}
+ cfg := newConfig(s.Config)
+ cfg.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
h := handler{Config: cfg}
h.ServeHTTP(resp, req)
c.Check(resp.Code, check.Equals, http.StatusNotFound)
}
func (s *IntegrationSuite) TestVhostRedirectQueryTokenSiteFS(c *check.C) {
- s.testServer.Config.AttachmentOnlyHost = "download.example.com"
+ s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
resp := s.testVhostRedirectTokenToCookie(c, "GET",
"download.example.com/by_id/"+arvadostest.FooCollection+"/foo",
"?api_token="+arvadostest.ActiveToken,
}
func (s *IntegrationSuite) TestPastCollectionVersionFileAccess(c *check.C) {
- s.testServer.Config.AttachmentOnlyHost = "download.example.com"
+ s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
resp := s.testVhostRedirectTokenToCookie(c, "GET",
"download.example.com/c="+arvadostest.WazVersion1Collection+"/waz",
"?api_token="+arvadostest.ActiveToken,
}
func (s *IntegrationSuite) TestVhostRedirectQueryTokenTrustAllContent(c *check.C) {
- s.testServer.Config.TrustAllContent = true
+ s.testServer.Config.cluster.Collections.TrustAllContent = true
s.testVhostRedirectTokenToCookie(c, "GET",
"example.com/c="+arvadostest.FooCollection+"/foo",
"?api_token="+arvadostest.ActiveToken,
}
func (s *IntegrationSuite) TestVhostRedirectQueryTokenAttachmentOnlyHost(c *check.C) {
- s.testServer.Config.AttachmentOnlyHost = "example.com:1234"
+ s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = "example.com:1234"
s.testVhostRedirectTokenToCookie(c, "GET",
"example.com/c="+arvadostest.FooCollection+"/foo",
}
func (s *IntegrationSuite) TestAnonymousTokenOK(c *check.C) {
- s.testServer.Config.AnonymousTokens = []string{arvadostest.AnonymousToken}
+ s.testServer.Config.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
s.testVhostRedirectTokenToCookie(c, "GET",
"example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
"",
}
func (s *IntegrationSuite) TestAnonymousTokenError(c *check.C) {
- s.testServer.Config.AnonymousTokens = []string{"anonymousTokenConfiguredButInvalid"}
+ s.testServer.Config.cluster.Users.AnonymousUserToken = "anonymousTokenConfiguredButInvalid"
s.testVhostRedirectTokenToCookie(c, "GET",
"example.com/c="+arvadostest.HelloWorldCollection+"/Hello%20world.txt",
"",
}
func (s *IntegrationSuite) TestSpecialCharsInPath(c *check.C) {
- s.testServer.Config.AttachmentOnlyHost = "download.example.com"
+ s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
client := s.testServer.Config.Client
client.AuthToken = arvadostest.ActiveToken
}
func (s *IntegrationSuite) TestDirectoryListingWithAnonymousToken(c *check.C) {
- s.testServer.Config.AnonymousTokens = []string{arvadostest.AnonymousToken}
+ s.testServer.Config.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
s.testDirectoryListing(c)
}
func (s *IntegrationSuite) TestDirectoryListingWithNoAnonymousToken(c *check.C) {
- s.testServer.Config.AnonymousTokens = nil
+ s.testServer.Config.cluster.Users.AnonymousUserToken = ""
s.testDirectoryListing(c)
}
func (s *IntegrationSuite) testDirectoryListing(c *check.C) {
- s.testServer.Config.AttachmentOnlyHost = "download.example.com"
+ s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = "download.example.com"
authHeader := http.Header{
"Authorization": {"OAuth2 " + arvadostest.ActiveToken},
}
cutDirs: 2,
},
} {
- c.Logf("HTML: %q => %q", trial.uri, trial.expect)
+ comment := check.Commentf("HTML: %q => %q", trial.uri, trial.expect)
resp := httptest.NewRecorder()
u := mustParseURL("//" + trial.uri)
req := &http.Request{
s.testServer.Handler.ServeHTTP(resp, req)
}
if trial.redirect != "" {
- c.Check(req.URL.Path, check.Equals, trial.redirect)
+ c.Check(req.URL.Path, check.Equals, trial.redirect, comment)
}
if trial.expect == nil {
- c.Check(resp.Code, check.Equals, http.StatusNotFound)
+ c.Check(resp.Code, check.Equals, http.StatusNotFound, comment)
} else {
- c.Check(resp.Code, check.Equals, http.StatusOK)
+ c.Check(resp.Code, check.Equals, http.StatusOK, comment)
for _, e := range trial.expect {
- c.Check(resp.Body.String(), check.Matches, `(?ms).*href="./`+e+`".*`)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*href="./`+e+`".*`, comment)
}
- c.Check(resp.Body.String(), check.Matches, `(?ms).*--cut-dirs=`+fmt.Sprintf("%d", trial.cutDirs)+` .*`)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*--cut-dirs=`+fmt.Sprintf("%d", trial.cutDirs)+` .*`, comment)
}
- c.Logf("WebDAV: %q => %q", trial.uri, trial.expect)
+ comment = check.Commentf("WebDAV: %q => %q", trial.uri, trial.expect)
req = &http.Request{
Method: "OPTIONS",
Host: u.Host,
resp = httptest.NewRecorder()
s.testServer.Handler.ServeHTTP(resp, req)
if trial.expect == nil {
- c.Check(resp.Code, check.Equals, http.StatusNotFound)
+ c.Check(resp.Code, check.Equals, http.StatusNotFound, comment)
} else {
- c.Check(resp.Code, check.Equals, http.StatusOK)
+ c.Check(resp.Code, check.Equals, http.StatusOK, comment)
}
req = &http.Request{
resp = httptest.NewRecorder()
s.testServer.Handler.ServeHTTP(resp, req)
if trial.expect == nil {
- c.Check(resp.Code, check.Equals, http.StatusNotFound)
+ c.Check(resp.Code, check.Equals, http.StatusNotFound, comment)
} else {
- c.Check(resp.Code, check.Equals, http.StatusMultiStatus)
+ c.Check(resp.Code, check.Equals, http.StatusMultiStatus, comment)
for _, e := range trial.expect {
- c.Check(resp.Body.String(), check.Matches, `(?ms).*<D:href>`+filepath.Join(u.Path, e)+`</D:href>.*`)
+ c.Check(resp.Body.String(), check.Matches, `(?ms).*<D:href>`+filepath.Join(u.Path, e)+`</D:href>.*`, comment)
}
}
}
var updated arvados.Collection
for _, fnm := range []string{"foo.txt", "bar.txt"} {
- s.testServer.Config.AttachmentOnlyHost = "example.com"
+ s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = "example.com"
u, _ := url.Parse("http://example.com/c=" + newCollection.UUID + "/" + fnm)
req := &http.Request{
Method: "DELETE",
}
func (s *IntegrationSuite) TestHealthCheckPing(c *check.C) {
- s.testServer.Config.ManagementToken = arvadostest.ManagementToken
+ s.testServer.Config.cluster.ManagementToken = arvadostest.ManagementToken
authHeader := http.Header{
"Authorization": {"Bearer " + arvadostest.ManagementToken},
}
Description=Arvados Keep web gateway
Documentation=https://doc.arvados.org/
After=network.target
-AssertPathExists=/etc/arvados/keep-web/keep-web.yml
# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
StartLimitInterval=0
"flag"
"fmt"
"os"
- "time"
+ "git.curoverse.com/arvados.git/lib/config"
"git.curoverse.com/arvados.git/sdk/go/arvados"
- "git.curoverse.com/arvados.git/sdk/go/config"
"github.com/coreos/go-systemd/daemon"
+ "github.com/ghodss/yaml"
log "github.com/sirupsen/logrus"
)
var (
- defaultConfigPath = "/etc/arvados/keep-web/keep-web.yml"
- version = "dev"
+ version = "dev"
)
// Config specifies server configuration.
type Config struct {
- Client arvados.Client
-
- Listen string
-
- AnonymousTokens []string
- AttachmentOnlyHost string
- TrustAllContent bool
-
- Cache cache
-
- // Hack to support old command line flag, which is a bool
- // meaning "get actual token from environment".
- deprecatedAllowAnonymous bool
-
- //Authorization token to be included in all health check requests.
- ManagementToken string
+ Client arvados.Client
+ Cache cache
+ cluster *arvados.Cluster
}
-// DefaultConfig returns the default configuration.
-func DefaultConfig() *Config {
- return &Config{
- Listen: ":80",
- Cache: cache{
- TTL: arvados.Duration(5 * time.Minute),
- UUIDTTL: arvados.Duration(5 * time.Second),
- MaxCollectionEntries: 1000,
- MaxCollectionBytes: 100000000,
- MaxPermissionEntries: 1000,
- MaxUUIDEntries: 1000,
- },
+func newConfig(arvCfg *arvados.Config) *Config {
+ cfg := Config{}
+ var cls *arvados.Cluster
+ var err error
+ if cls, err = arvCfg.GetCluster(""); err != nil {
+ log.Fatal(err)
}
+ cfg.cluster = cls
+ cfg.Cache.config = &cfg.cluster.Collections.WebDAVCache
+ return &cfg
}
func init() {
})
}
-func main() {
- cfg := DefaultConfig()
-
- var configPath string
- deprecated := " (DEPRECATED -- use config file instead)"
- flag.StringVar(&configPath, "config", defaultConfigPath,
- "`path` to JSON or YAML configuration file")
- flag.StringVar(&cfg.Listen, "listen", "",
- "address:port or :port to listen on"+deprecated)
- flag.BoolVar(&cfg.deprecatedAllowAnonymous, "allow-anonymous", false,
- "Load an anonymous token from the ARVADOS_API_TOKEN environment variable"+deprecated)
- flag.StringVar(&cfg.AttachmentOnlyHost, "attachment-only-host", "",
- "Only serve attachments at the given `host:port`"+deprecated)
- flag.BoolVar(&cfg.TrustAllContent, "trust-all-content", false,
- "Serve non-public content from a single origin. Dangerous: read docs before using!"+deprecated)
- flag.StringVar(&cfg.ManagementToken, "management-token", "",
- "Authorization token to be included in all health check requests.")
-
- dumpConfig := flag.Bool("dump-config", false,
+func configure(logger log.FieldLogger, args []string) *Config {
+ flags := flag.NewFlagSet(args[0], flag.ExitOnError)
+
+ loader := config.NewLoader(os.Stdin, logger)
+ loader.SetupFlags(flags)
+
+ dumpConfig := flags.Bool("dump-config", false,
"write current configuration to stdout and exit")
- getVersion := flag.Bool("version", false,
+ getVersion := flags.Bool("version", false,
"print version information and exit.")
- flag.Usage = usage
- flag.Parse()
+
+ args = loader.MungeLegacyConfigArgs(logger, args[1:], "-legacy-keepweb-config")
+ flags.Parse(args)
// Print version information if requested
if *getVersion {
fmt.Printf("keep-web %s\n", version)
- return
+ return nil
}
- if err := config.LoadFile(cfg, configPath); err != nil {
- if h := os.Getenv("ARVADOS_API_HOST"); h != "" && configPath == defaultConfigPath {
- log.Printf("DEPRECATED: Using ARVADOS_API_HOST environment variable. Use config file instead.")
- cfg.Client.APIHost = h
- } else {
+ arvCfg, err := loader.Load()
+ if err != nil {
+ log.Fatal(err)
+ }
+ cfg := newConfig(arvCfg)
+
+ if *dumpConfig {
+ out, err := yaml.Marshal(cfg)
+ if err != nil {
log.Fatal(err)
}
+ _, err = os.Stdout.Write(out)
+ if err != nil {
+ log.Fatal(err)
+ }
+ return nil
}
- if cfg.deprecatedAllowAnonymous {
- log.Printf("DEPRECATED: Using -allow-anonymous command line flag with ARVADOS_API_TOKEN environment variable. Use config file instead.")
- cfg.AnonymousTokens = []string{os.Getenv("ARVADOS_API_TOKEN")}
- }
+ return cfg
+}
- if *dumpConfig {
- log.Fatal(config.DumpAndExit(cfg))
+func main() {
+ logger := log.New()
+
+ cfg := configure(logger, os.Args)
+ if cfg == nil {
+ return
}
log.Printf("keep-web %s started", version)
- os.Setenv("ARVADOS_API_HOST", cfg.Client.APIHost)
+ os.Setenv("ARVADOS_API_HOST", cfg.cluster.Services.Controller.ExternalURL.Host)
srv := &server{Config: cfg}
if err := srv.Start(); err != nil {
log.Fatal(err)
"context"
"net/http"
+ "git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/ctxlog"
"git.curoverse.com/arvados.git/sdk/go/httpserver"
"github.com/prometheus/client_golang/prometheus"
h.Config.Cache.registry = reg
ctx := ctxlog.Context(context.Background(), logrus.StandardLogger())
mh := httpserver.Instrument(reg, nil, httpserver.HandlerWithContext(ctx, httpserver.AddRequestIDs(httpserver.LogRequests(h))))
- h.MetricsAPI = mh.ServeAPI(h.Config.ManagementToken, http.NotFoundHandler())
+ h.MetricsAPI = mh.ServeAPI(h.Config.cluster.ManagementToken, http.NotFoundHandler())
srv.Handler = mh
- srv.Addr = srv.Config.Listen
+ var listen arvados.URL
+ for listen = range srv.Config.cluster.Services.WebDAV.InternalURLs {
+ break
+ }
+ if len(srv.Config.cluster.Services.WebDAV.InternalURLs) > 1 {
+ logrus.Warn("Services.WebDAV.InternalURLs has more than one key; picked: ", listen)
+ }
+ srv.Addr = listen.Host
return srv.Server.Start()
}
package main
import (
+ "bytes"
"crypto/md5"
"encoding/json"
"fmt"
"strings"
"testing"
+ "git.curoverse.com/arvados.git/lib/config"
"git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
"git.curoverse.com/arvados.git/sdk/go/arvadostest"
}
func (s *IntegrationSuite) Test200(c *check.C) {
- s.testServer.Config.AnonymousTokens = []string{arvadostest.AnonymousToken}
+ s.testServer.Config.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
for _, spec := range []curlCase{
// My collection
{
}
func (s *IntegrationSuite) TestMetrics(c *check.C) {
- s.testServer.Config.AttachmentOnlyHost = s.testServer.Addr
+ s.testServer.Config.cluster.Services.WebDAVDownload.ExternalURL.Host = s.testServer.Addr
origin := "http://" + s.testServer.Addr
req, _ := http.NewRequest("GET", origin+"/notfound", nil)
_, err := http.DefaultClient.Do(req)
func (s *IntegrationSuite) SetUpTest(c *check.C) {
arvadostest.ResetEnv()
- cfg := DefaultConfig()
+ ldr := config.NewLoader(bytes.NewBufferString("Clusters: {zzzzz: {}}"), nil)
+ ldr.Path = "-"
+ arvCfg, err := ldr.Load()
+ c.Check(err, check.IsNil)
+ cfg := newConfig(arvCfg)
+ c.Assert(err, check.IsNil)
cfg.Client = arvados.Client{
APIHost: testAPIHost,
Insecure: true,
}
- cfg.Listen = "127.0.0.1:0"
- cfg.ManagementToken = arvadostest.ManagementToken
+ listen := "127.0.0.1:0"
+ cfg.cluster.Services.WebDAV.InternalURLs[arvados.URL{Host: listen}] = arvados.ServiceInstance{}
+ cfg.cluster.Services.WebDAVDownload.InternalURLs[arvados.URL{Host: listen}] = arvados.ServiceInstance{}
+ cfg.cluster.ManagementToken = arvadostest.ManagementToken
+ cfg.cluster.Users.AnonymousUserToken = arvadostest.AnonymousToken
s.testServer = &server{Config: cfg}
- err := s.testServer.Start()
+ err = s.testServer.Start()
c.Assert(err, check.Equals, nil)
}
)
func (s *UnitSuite) TestStatus(c *check.C) {
- h := handler{Config: DefaultConfig()}
+ h := handler{Config: newConfig(s.Config)}
u, _ := url.Parse("http://keep-web.example/status.json")
req := &http.Request{
Method: "GET",
+++ /dev/null
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
- "encoding/json"
- "flag"
- "fmt"
- "os"
-)
-
-func usage() {
- c := DefaultConfig()
- c.AnonymousTokens = []string{"xxxxxxxxxxxxxxxxxxxxxxx"}
- c.Client.APIHost = "zzzzz.arvadosapi.com:443"
- exampleConfigFile, err := json.MarshalIndent(c, " ", " ")
- if err != nil {
- panic(err)
- }
- fmt.Fprintf(os.Stderr, `
-
-Keep-web provides read-only HTTP access to files stored in Keep; see
-https://godoc.org/github.com/curoverse/arvados/services/keep-web and
-http://doc.arvados.org/install/install-keep-web.html
-
-Usage: keep-web -config path/to/keep-web.yml
-
-Options:
-`)
- flag.PrintDefaults()
- fmt.Fprintf(os.Stderr, `
-Example config file:
- %s
-
-Client.APIHost:
-
- Address (or address:port) of the Arvados API endpoint.
-
-Client.AuthToken:
-
- Unused. Normally empty, or omitted entirely.
-
-Client.Insecure:
-
- True if your Arvados API endpoint uses an unverifiable SSL/TLS
- certificate.
-
-Listen:
-
- Local port to listen on. Can be "address", "address:port", or
- ":port", where "address" is a host IP address or name and "port"
- is a port number or name.
-
-AnonymousTokens:
-
- Array of tokens to try when a client does not provide a token.
-
-AttachmentOnlyHost:
-
- Accept credentials, and add "Content-Disposition: attachment"
- response headers, for requests at this hostname:port.
-
- This prohibits inline display, which makes it possible to serve
- untrusted and non-public content from a single origin, i.e.,
- without wildcard DNS or SSL.
-
-TrustAllContent:
-
- Serve non-public content from a single origin. Dangerous: read
- docs before using!
-
-Cache.TTL:
-
- Maximum time to cache manifests and permission checks.
-
-Cache.UUIDTTL:
-
- Maximum time to cache collection state.
-
-Cache.MaxCollectionEntries:
-
- Maximum number of collection cache entries.
-
-Cache.MaxCollectionBytes:
-
- Approximate memory limit for collection cache.
-
-Cache.MaxPermissionEntries:
-
- Maximum number of permission cache entries.
-
-Cache.MaxUUIDEntries:
-
- Maximum number of UUID cache entries.
-
-`, exampleConfigFile)
-}
"syscall"
"time"
+ "git.curoverse.com/arvados.git/lib/config"
"git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
- "git.curoverse.com/arvados.git/sdk/go/config"
"git.curoverse.com/arvados.git/sdk/go/health"
"git.curoverse.com/arvados.git/sdk/go/httpserver"
"git.curoverse.com/arvados.git/sdk/go/keepclient"
var version = "dev"
-type Config struct {
- Client arvados.Client
- Listen string
- DisableGet bool
- DisablePut bool
- DefaultReplicas int
- Timeout arvados.Duration
- PIDFile string
- Debug bool
- ManagementToken string
-}
-
-func DefaultConfig() *Config {
- return &Config{
- Listen: ":25107",
- Timeout: arvados.Duration(15 * time.Second),
- }
-}
-
var (
listener net.Listener
router http.Handler
const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
-func main() {
- log.SetFormatter(&log.JSONFormatter{
- TimestampFormat: rfc3339NanoFixed,
- })
+func configure(logger log.FieldLogger, args []string) (*arvados.Cluster, error) {
+ flags := flag.NewFlagSet(args[0], flag.ExitOnError)
- cfg := DefaultConfig()
+ dumpConfig := flags.Bool("dump-config", false, "write current configuration to stdout and exit")
+ getVersion := flags.Bool("version", false, "Print version information and exit.")
- flagset := flag.NewFlagSet("keepproxy", flag.ExitOnError)
- flagset.Usage = usage
+ loader := config.NewLoader(os.Stdin, logger)
+ loader.SetupFlags(flags)
- const deprecated = " (DEPRECATED -- use config file instead)"
- flagset.StringVar(&cfg.Listen, "listen", cfg.Listen, "Local port to listen on."+deprecated)
- flagset.BoolVar(&cfg.DisableGet, "no-get", cfg.DisableGet, "Disable GET operations."+deprecated)
- flagset.BoolVar(&cfg.DisablePut, "no-put", cfg.DisablePut, "Disable PUT operations."+deprecated)
- flagset.IntVar(&cfg.DefaultReplicas, "default-replicas", cfg.DefaultReplicas, "Default number of replicas to write if not specified by the client. If 0, use site default."+deprecated)
- flagset.StringVar(&cfg.PIDFile, "pid", cfg.PIDFile, "Path to write pid file."+deprecated)
- timeoutSeconds := flagset.Int("timeout", int(time.Duration(cfg.Timeout)/time.Second), "Timeout (in seconds) on requests to internal Keep services."+deprecated)
- flagset.StringVar(&cfg.ManagementToken, "management-token", cfg.ManagementToken, "Authorization token to be included in all health check requests.")
-
- var cfgPath string
- const defaultCfgPath = "/etc/arvados/keepproxy/keepproxy.yml"
- flagset.StringVar(&cfgPath, "config", defaultCfgPath, "Configuration file `path`")
- dumpConfig := flagset.Bool("dump-config", false, "write current configuration to stdout and exit")
- getVersion := flagset.Bool("version", false, "Print version information and exit.")
- flagset.Parse(os.Args[1:])
+ args = loader.MungeLegacyConfigArgs(logger, args[1:], "-legacy-keepproxy-config")
+ flags.Parse(args)
// Print version information if requested
if *getVersion {
fmt.Printf("keepproxy %s\n", version)
- return
+ return nil, nil
}
- err := config.LoadFile(cfg, cfgPath)
+ cfg, err := loader.Load()
if err != nil {
- h := os.Getenv("ARVADOS_API_HOST")
- t := os.Getenv("ARVADOS_API_TOKEN")
- if h == "" || t == "" || !os.IsNotExist(err) || cfgPath != defaultCfgPath {
- log.Fatal(err)
- }
- log.Print("DEPRECATED: No config file found, but ARVADOS_API_HOST and ARVADOS_API_TOKEN environment variables are set. Please use a config file instead.")
- cfg.Client.APIHost = h
- cfg.Client.AuthToken = t
- if regexp.MustCompile("^(?i:1|yes|true)$").MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")) {
- cfg.Client.Insecure = true
+ return nil, err
+ }
+ cluster, err := cfg.GetCluster("")
+ if err != nil {
+ return nil, err
+ }
+
+ if *dumpConfig {
+ out, err := yaml.Marshal(cfg)
+ if err != nil {
+ return nil, err
}
- if y, err := yaml.Marshal(cfg); err == nil && !*dumpConfig {
- log.Print("Current configuration:\n", string(y))
+ if _, err := os.Stdout.Write(out); err != nil {
+ return nil, err
}
- cfg.Timeout = arvados.Duration(time.Duration(*timeoutSeconds) * time.Second)
+ return nil, nil
}
+ return cluster, nil
+}
- if *dumpConfig {
- log.Fatal(config.DumpAndExit(cfg))
+func main() {
+ logger := log.New()
+ logger.Formatter = &log.JSONFormatter{
+ TimestampFormat: rfc3339NanoFixed,
+ }
+
+ cluster, err := configure(logger, os.Args)
+ if err != nil {
+ log.Fatal(err)
+ }
+ if cluster == nil {
+ return
}
log.Printf("keepproxy %s started", version)
- arv, err := arvadosclient.New(&cfg.Client)
+ if err := run(logger, cluster); err != nil {
+ log.Fatal(err)
+ }
+
+ log.Println("shutting down")
+}
+
+func run(logger log.FieldLogger, cluster *arvados.Cluster) error {
+ client, err := arvados.NewClientFromConfig(cluster)
+ if err != nil {
+ return err
+ }
+ client.AuthToken = cluster.SystemRootToken
+
+ arv, err := arvadosclient.New(client)
if err != nil {
- log.Fatalf("Error setting up arvados client %s", err.Error())
+ return fmt.Errorf("Error setting up arvados client %v", err)
}
- if cfg.Debug {
+ if cluster.SystemLogs.LogLevel == "debug" {
keepclient.DebugPrintf = log.Printf
}
kc, err := keepclient.MakeKeepClient(arv)
if err != nil {
- log.Fatalf("Error setting up keep client %s", err.Error())
+ return fmt.Errorf("Error setting up keep client %v", err)
}
keepclient.RefreshServiceDiscoveryOnSIGHUP()
- if cfg.PIDFile != "" {
- f, err := os.Create(cfg.PIDFile)
- if err != nil {
- log.Fatal(err)
- }
- defer f.Close()
- err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
- if err != nil {
- log.Fatalf("flock(%s): %s", cfg.PIDFile, err)
- }
- defer os.Remove(cfg.PIDFile)
- err = f.Truncate(0)
- if err != nil {
- log.Fatalf("truncate(%s): %s", cfg.PIDFile, err)
- }
- _, err = fmt.Fprint(f, os.Getpid())
- if err != nil {
- log.Fatalf("write(%s): %s", cfg.PIDFile, err)
- }
- err = f.Sync()
- if err != nil {
- log.Fatalf("sync(%s): %s", cfg.PIDFile, err)
- }
+ if cluster.Collections.DefaultReplication > 0 {
+ kc.Want_replicas = cluster.Collections.DefaultReplication
}
- if cfg.DefaultReplicas > 0 {
- kc.Want_replicas = cfg.DefaultReplicas
+ var listen arvados.URL
+ for listen = range cluster.Services.Keepproxy.InternalURLs {
+ break
}
- listener, err = net.Listen("tcp", cfg.Listen)
- if err != nil {
- log.Fatalf("listen(%s): %s", cfg.Listen, err)
+ var lErr error
+ listener, lErr = net.Listen("tcp", listen.Host)
+ if lErr != nil {
+ return fmt.Errorf("listen(%s): %v", listen.Host, lErr)
}
+
if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
log.Printf("Error notifying init daemon: %v", err)
}
- log.Println("Listening at", listener.Addr())
+ log.Println("listening at", listener.Addr())
// Shut down the server gracefully (by closing the listener)
// if SIGTERM is received.
signal.Notify(term, syscall.SIGINT)
// Start serving requests.
- router = MakeRESTRouter(!cfg.DisableGet, !cfg.DisablePut, kc, time.Duration(cfg.Timeout), cfg.ManagementToken)
- http.Serve(listener, httpserver.AddRequestIDs(httpserver.LogRequests(router)))
-
- log.Println("shutting down")
+ router = MakeRESTRouter(kc, time.Duration(cluster.API.KeepServiceRequestTimeout), cluster.SystemRootToken)
+ return http.Serve(listener, httpserver.AddRequestIDs(httpserver.LogRequests(router)))
}
type ApiTokenCache struct {
// MakeRESTRouter returns an http.Handler that passes GET and PUT
// requests to the appropriate handlers.
-func MakeRESTRouter(enable_get bool, enable_put bool, kc *keepclient.KeepClient, timeout time.Duration, mgmtToken string) http.Handler {
+func MakeRESTRouter(kc *keepclient.KeepClient, timeout time.Duration, mgmtToken string) http.Handler {
rest := mux.NewRouter()
transport := defaultTransport
},
}
- if enable_get {
- rest.HandleFunc(`/{locator:[0-9a-f]{32}\+.*}`, h.Get).Methods("GET", "HEAD")
- rest.HandleFunc(`/{locator:[0-9a-f]{32}}`, h.Get).Methods("GET", "HEAD")
+ rest.HandleFunc(`/{locator:[0-9a-f]{32}\+.*}`, h.Get).Methods("GET", "HEAD")
+ rest.HandleFunc(`/{locator:[0-9a-f]{32}}`, h.Get).Methods("GET", "HEAD")
- // List all blocks
- rest.HandleFunc(`/index`, h.Index).Methods("GET")
+ // List all blocks
+ rest.HandleFunc(`/index`, h.Index).Methods("GET")
- // List blocks whose hash has the given prefix
- rest.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, h.Index).Methods("GET")
- }
+ // List blocks whose hash has the given prefix
+ rest.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, h.Index).Methods("GET")
- if enable_put {
- rest.HandleFunc(`/{locator:[0-9a-f]{32}\+.*}`, h.Put).Methods("PUT")
- rest.HandleFunc(`/{locator:[0-9a-f]{32}}`, h.Put).Methods("PUT")
- rest.HandleFunc(`/`, h.Put).Methods("POST")
- rest.HandleFunc(`/{any}`, h.Options).Methods("OPTIONS")
- rest.HandleFunc(`/`, h.Options).Methods("OPTIONS")
- }
+ rest.HandleFunc(`/{locator:[0-9a-f]{32}\+.*}`, h.Put).Methods("PUT")
+ rest.HandleFunc(`/{locator:[0-9a-f]{32}}`, h.Put).Methods("PUT")
+ rest.HandleFunc(`/`, h.Put).Methods("POST")
+ rest.HandleFunc(`/{any}`, h.Options).Methods("OPTIONS")
+ rest.HandleFunc(`/`, h.Options).Methods("OPTIONS")
rest.Handle("/_health/{check}", &health.Handler{
Token: mgmtToken,
Description=Arvados Keep Proxy
Documentation=https://doc.arvados.org/
After=network.target
-AssertPathExists=/etc/arvados/keepproxy/keepproxy.yml
# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
StartLimitInterval=0
"math/rand"
"net/http"
"net/http/httptest"
- "os"
"strings"
"sync"
"testing"
"time"
+ "git.curoverse.com/arvados.git/lib/config"
+ "git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
"git.curoverse.com/arvados.git/sdk/go/arvadostest"
"git.curoverse.com/arvados.git/sdk/go/keepclient"
+ log "github.com/sirupsen/logrus"
. "gopkg.in/check.v1"
)
arvadostest.StopAPI()
}
-func runProxy(c *C, args []string, bogusClientToken bool) *keepclient.KeepClient {
- args = append([]string{"keepproxy"}, args...)
- os.Args = append(args, "-listen=:0")
+func runProxy(c *C, bogusClientToken bool) *keepclient.KeepClient {
+ cfg, err := config.NewLoader(nil, nil).Load()
+ c.Assert(err, Equals, nil)
+ cluster, err := cfg.GetCluster("")
+ c.Assert(err, Equals, nil)
+
+ cluster.Services.Keepproxy.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: ":0"}: arvados.ServiceInstance{}}
+
listener = nil
- go main()
+ go func() {
+ run(log.New(), cluster)
+ defer closeListener()
+ }()
waitForListener()
- arv, err := arvadosclient.MakeArvadosClient()
+ client := arvados.NewClientFromEnv()
+ arv, err := arvadosclient.New(client)
c.Assert(err, Equals, nil)
if bogusClientToken {
arv.ApiToken = "bogus-token"
}
func (s *ServerRequiredSuite) TestResponseViaHeader(c *C) {
- runProxy(c, nil, false)
+ runProxy(c, false)
defer closeListener()
req, err := http.NewRequest("POST",
}
func (s *ServerRequiredSuite) TestLoopDetection(c *C) {
- kc := runProxy(c, nil, false)
+ kc := runProxy(c, false)
defer closeListener()
sr := map[string]string{
}
func (s *ServerRequiredSuite) TestStorageClassesHeader(c *C) {
- kc := runProxy(c, nil, false)
+ kc := runProxy(c, false)
defer closeListener()
// Set up fake keepstore to record request headers
}
func (s *ServerRequiredSuite) TestDesiredReplicas(c *C) {
- kc := runProxy(c, nil, false)
+ kc := runProxy(c, false)
defer closeListener()
content := []byte("TestDesiredReplicas")
}
func (s *ServerRequiredSuite) TestPutWrongContentLength(c *C) {
- kc := runProxy(c, nil, false)
+ kc := runProxy(c, false)
defer closeListener()
content := []byte("TestPutWrongContentLength")
// fixes the invalid Content-Length header. In order to test
// our server behavior, we have to call the handler directly
// using an httptest.ResponseRecorder.
- rtr := MakeRESTRouter(true, true, kc, 10*time.Second, "")
+ rtr := MakeRESTRouter(kc, 10*time.Second, "")
type testcase struct {
sendLength string
}
func (s *ServerRequiredSuite) TestManyFailedPuts(c *C) {
- kc := runProxy(c, nil, false)
+ kc := runProxy(c, false)
defer closeListener()
router.(*proxyHandler).timeout = time.Nanosecond
}
func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
- kc := runProxy(c, nil, false)
+ kc := runProxy(c, false)
defer closeListener()
hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
}
func (s *ServerRequiredSuite) TestPutAskGetForbidden(c *C) {
- kc := runProxy(c, nil, true)
+ kc := runProxy(c, true)
defer closeListener()
hash := fmt.Sprintf("%x+3", md5.Sum([]byte("bar")))
}
-func (s *ServerRequiredSuite) TestGetDisabled(c *C) {
- kc := runProxy(c, []string{"-no-get"}, false)
- defer closeListener()
-
- hash := fmt.Sprintf("%x", md5.Sum([]byte("baz")))
-
- {
- _, _, err := kc.Ask(hash)
- errNotFound, _ := err.(keepclient.ErrNotFound)
- c.Check(errNotFound, NotNil)
- c.Assert(err, ErrorMatches, `.*HTTP 405.*`)
- c.Log("Ask 1")
- }
-
- {
- hash2, rep, err := kc.PutB([]byte("baz"))
- c.Check(hash2, Matches, fmt.Sprintf(`^%s\+3(\+.+)?$`, hash))
- c.Check(rep, Equals, 2)
- c.Check(err, Equals, nil)
- c.Log("PutB")
- }
-
- {
- blocklen, _, err := kc.Ask(hash)
- errNotFound, _ := err.(keepclient.ErrNotFound)
- c.Check(errNotFound, NotNil)
- c.Assert(err, ErrorMatches, `.*HTTP 405.*`)
- c.Check(blocklen, Equals, int64(0))
- c.Log("Ask 2")
- }
-
- {
- _, blocklen, _, err := kc.Get(hash)
- errNotFound, _ := err.(keepclient.ErrNotFound)
- c.Check(errNotFound, NotNil)
- c.Assert(err, ErrorMatches, `.*HTTP 405.*`)
- c.Check(blocklen, Equals, int64(0))
- c.Log("Get")
- }
-}
-
-func (s *ServerRequiredSuite) TestPutDisabled(c *C) {
- kc := runProxy(c, []string{"-no-put"}, false)
- defer closeListener()
-
- hash2, rep, err := kc.PutB([]byte("quux"))
- c.Check(hash2, Equals, "")
- c.Check(rep, Equals, 0)
- c.Check(err, FitsTypeOf, keepclient.InsufficientReplicasError(errors.New("")))
-}
-
func (s *ServerRequiredSuite) TestCorsHeaders(c *C) {
- runProxy(c, nil, false)
+ runProxy(c, false)
defer closeListener()
{
}
func (s *ServerRequiredSuite) TestPostWithoutHash(c *C) {
- runProxy(c, nil, false)
+ runProxy(c, false)
defer closeListener()
{
// With a valid but non-existing prefix (expect "\n")
// With an invalid prefix (expect error)
func (s *ServerRequiredSuite) TestGetIndex(c *C) {
- kc := runProxy(c, nil, false)
+ kc := runProxy(c, false)
defer closeListener()
// Put "index-data" blocks
}
func (s *ServerRequiredSuite) TestCollectionSharingToken(c *C) {
- kc := runProxy(c, nil, false)
+ kc := runProxy(c, false)
defer closeListener()
hash, _, err := kc.PutB([]byte("shareddata"))
c.Check(err, IsNil)
}
func (s *ServerRequiredSuite) TestPutAskGetInvalidToken(c *C) {
- kc := runProxy(c, nil, false)
+ kc := runProxy(c, false)
defer closeListener()
// Put a test block
}
func (s *ServerRequiredSuite) TestAskGetKeepProxyConnectionError(c *C) {
- kc := runProxy(c, nil, false)
+ kc := runProxy(c, false)
defer closeListener()
// Point keepproxy at a non-existent keepstore
}
func (s *NoKeepServerSuite) TestAskGetNoKeepServerError(c *C) {
- kc := runProxy(c, nil, false)
+ kc := runProxy(c, false)
defer closeListener()
hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
}
func (s *ServerRequiredSuite) TestPing(c *C) {
- kc := runProxy(c, nil, false)
+ kc := runProxy(c, false)
defer closeListener()
- rtr := MakeRESTRouter(true, true, kc, 10*time.Second, arvadostest.ManagementToken)
+ rtr := MakeRESTRouter(kc, 10*time.Second, arvadostest.ManagementToken)
req, err := http.NewRequest("GET",
"http://"+listener.Addr().String()+"/_health/ping",
+++ /dev/null
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
- "encoding/json"
- "flag"
- "fmt"
- "os"
-)
-
-func usage() {
- c := DefaultConfig()
- c.Client.APIHost = "zzzzz.arvadosapi.com:443"
- exampleConfigFile, err := json.MarshalIndent(c, " ", " ")
- if err != nil {
- panic(err)
- }
- fmt.Fprintf(os.Stderr, `
-
-Keepproxy forwards GET and PUT requests to keepstore servers. See
-http://doc.arvados.org/install/install-keepproxy.html
-
-Usage: keepproxy [-config path/to/keepproxy.yml]
-
-Options:
-`)
- flag.PrintDefaults()
- fmt.Fprintf(os.Stderr, `
-Example config file:
- %s
-
-Client.APIHost:
-
- Address (or address:port) of the Arvados API endpoint.
-
-Client.AuthToken:
-
- Anonymous API token.
-
-Client.Insecure:
-
- True if your Arvados API endpoint uses an unverifiable SSL/TLS
- certificate.
-
-Listen:
-
- Local port to listen on. Can be "address:port" or ":port", where
- "address" is a host IP address or name and "port" is a port number
- or name.
-
-DisableGet:
-
- Respond 404 to GET and HEAD requests.
-
-DisablePut:
-
- Respond 404 to PUT, POST, and OPTIONS requests.
-
-DefaultReplicas:
-
- Default number of replicas to write if not specified by the
- client. If this is zero or omitted, the site-wide
- defaultCollectionReplication configuration will be used.
-
-Timeout:
-
- Timeout for requests to keep services, with units (e.g., "120s",
- "2m").
-
-PIDFile:
-
- Path to PID file. During startup this file will be created if
- needed, and locked using flock() until keepproxy exits. If it is
- already locked, or any error is encountered while writing to it,
- keepproxy will exit immediately. If omitted or empty, no PID file
- will be used.
-
-Debug:
-
- Enable debug logging.
-
-ManagementToken:
-
- Authorization token to be included in all health check requests.
-
-`, exampleConfigFile)
-}
${uuid_prefix}:
ManagementToken: $management_token
Services:
+ RailsAPI:
+ InternalURLs:
+ "http://localhost:${services[api]}": {}
Workbench1:
ExternalURL: "https://$localip:${services[workbench]}"
Workbench2:
ExternalURL: "https://$localip:${services[workbench2-ssl]}"
SSO:
ExternalURL: "https://$localip:${services[sso]}"
+ Keepproxy:
+ InternalURLs:
+ "http://localhost:${services[keepproxy]}/": {}
+ ExternalURL: "http://$localip:${services[keepproxy-ssl]}/"
Websocket:
ExternalURL: "wss://$localip:${services[websockets-ssl]}/websocket"
+ InternalURLs:
+ "http://localhost:${services[websockets]}": {}
GitSSH:
ExternalURL: "ssh://git@$localip:"
GitHTTP:
- ExternalURL: "http://$localip:${services[arv-git-httpd]}/"
+ InternalURLs:
+ "http://localhost:${services[arv-git-httpd]}/": {}
+ ExternalURL: "https://$localip:${services[arv-git-httpd-ssl]}/"
WebDAV:
+ InternalURLs:
+ "http://localhost:${services[keep-web]}/": {}
ExternalURL: "https://$localip:${services[keep-web-ssl]}/"
+ WebDAVDownload:
+ InternalURLs:
+ "http://localhost:${services[keep-web]}/": {}
+ ExternalURL: "https://$localip:${services[keep-web-ssl]}/"
+ InternalURLs:
+ "http://localhost:${services[keep-web]}/": {}
Composer:
ExternalURL: "http://$localip:${services[composer]}"
Controller:
ExternalURL: "https://$localip:${services[controller-ssl]}"
- NodeProfiles: # to be deprecated in favor of "Services" section
- "*":
- arvados-controller:
- Listen: ":${services[controller]}" # choose a port
- arvados-api-server:
- Listen: ":${services[api]}" # must match Rails server port in your Nginx config
+ InternalURLs:
+ "http://localhost:${services[controller]}": {}
+ RailsAPI:
+ InternalURLs:
+ "http://localhost:${services[api]}/": {}
+ Keepproxy:
+ ExternalURL: "https://$localip:${services[keepproxy-ssl]}"
+ InternalURLs:
+ "http://localhost:${services[keepproxy]}": {}
PostgreSQL:
ConnectionPool: 32 # max concurrent connections per arvados server daemon
Connection:
Collections:
BlobSigningKey: $blob_signing_key
DefaultReplication: 1
+ TrustAllContent: true
Login:
ProviderAppSecret: $sso_app_secret
ProviderAppID: arvados-server
AutoSetupNewUsers: true
AutoSetupNewUsersWithVmUUID: $vm_uuid
AutoSetupNewUsersWithRepository: true
+ AnonymousUserToken: $(cat /var/lib/arvados/superuser_token)
Workbench:
SecretKeyBase: $workbench_secret_key_base
ArvadosDocsite: http://$localip:${services[doc]}/
+ Git:
+ GitCommand: /usr/share/gitolite3/gitolite-shell
+ GitoliteHome: /var/lib/arvados/git
+ Repositories: /var/lib/arvados/git/repositories
EOF
/usr/local/lib/arvbox/yml_override.py /var/lib/arvados/cluster_config.yml
[controller-ssl]=8000
[sso]=8900
[composer]=4200
+ [arv-git-httpd-ssl]=9000
[arv-git-httpd]=9001
[keep-web]=9003
[keep-web-ssl]=9002
[keepproxy]=25100
+ [keepproxy-ssl]=25101
[keepstore0]=25107
[keepstore1]=25108
[ssh]=22
export PATH="$PATH:/var/lib/arvados/git/bin"
cd ~git
-exec /usr/local/bin/arv-git-httpd \
- -address=:${services[arv-git-httpd]} \
- -git-command=/usr/share/gitolite3/gitolite-shell \
- -gitolite-home=/var/lib/arvados/git \
- -repo-root=/var/lib/arvados/git/repositories
+exec /usr/local/bin/arv-git-httpd
exit
fi
-export ARVADOS_API_HOST=$localip:${services[controller-ssl]}
-export ARVADOS_API_HOST_INSECURE=1
-export ARVADOS_API_TOKEN=$(cat /var/lib/arvados/superuser_token)
-
-exec /usr/local/bin/keep-web -trust-all-content -listen=:${services[keep-web]}
+exec /usr/local/bin/keep-web
read -rd $'\000' keepservice <<EOF
{
"service_host":"$localip",
- "service_port":${services[keepproxy]},
- "service_ssl_flag":false,
+ "service_port":${services[keepproxy-ssl]},
+ "service_ssl_flag":true,
"service_type":"proxy"
}
EOF
echo $UUID > /var/lib/arvados/keepproxy-uuid
fi
-exec /usr/local/bin/keepproxy -listen=:${services[keepproxy]}
+exec /usr/local/bin/keepproxy
server_name keep-web;
ssl_certificate "${server_cert}";
ssl_certificate_key "${server_cert_key}";
+ client_max_body_size 0;
location / {
proxy_pass http://keep-web;
proxy_set_header Host \$http_host;
}
}
+
+ upstream keepproxy {
+ server localhost:${services[keepproxy]};
+ }
+ server {
+ listen *:${services[keepproxy-ssl]} ssl default_server;
+ server_name keepproxy;
+ ssl_certificate "${server_cert}";
+ ssl_certificate_key "${server_cert_key}";
+ client_max_body_size 128M;
+ location / {
+ proxy_pass http://keepproxy;
+ proxy_set_header Host \$http_host;
+ proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto https;
+ proxy_redirect off;
+ }
+ }
+
+ upstream arvados-git-httpd {
+ server localhost:${services[arv-git-httpd]};
+ }
+ server {
+ listen *:${services[arv-git-httpd-ssl]} ssl default_server;
+ server_name arvados-git-httpd;
+ proxy_connect_timeout 90s;
+ proxy_read_timeout 300s;
+
+ ssl on;
+ ssl_certificate "${server_cert}";
+ ssl_certificate_key "${server_cert_key}";
+ client_max_body_size 50m;
+
+ location / {
+ proxy_pass http://arvados-git-httpd;
+ proxy_set_header Host \$http_host;
+ proxy_set_header X-Forwarded-For \$proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto https;
+ proxy_redirect off;
+ }
+ }
+
}
EOF
description='Summarize resource usage of an Arvados Crunch job')
src = self.add_mutually_exclusive_group()
src.add_argument(
- '--job', '--container', '--container-request',
+ '--job', '--container-request',
type=str, metavar='UUID',
- help='Look up the specified job, container, or container request '
+ help='Look up the specified job or container request '
+ 'and read its log data from Keep (or from the Arvados event log, '
+ 'if the job is still running)')
+ src.add_argument(
+ '--container',
+ type=str, metavar='UUID',
+ help='[Deprecated] Look up the specified container find its container request '
'and read its log data from Keep (or from the Arvados event log, '
'if the job is still running)')
src.add_argument(
'--pipeline-instance', type=str, metavar='UUID',
- help='Summarize each component of the given pipeline instance')
+ help='[Deprecated] Summarize each component of the given pipeline instance (historical pre-1.4)')
src.add_argument(
'--log-file', type=str,
help='Read log data from a regular file')
self.summer = summarizer.NewSummarizer(self.args.pipeline_instance, **kwargs)
elif self.args.job:
self.summer = summarizer.NewSummarizer(self.args.job, **kwargs)
+ elif self.args.container:
+ self.summer = summarizer.NewSummarizer(self.args.container, **kwargs)
elif self.args.log_file:
if self.args.log_file.endswith('.gz'):
fh = UTF8Decode(gzip.open(self.args.log_file))
elif self.detected_crunch1:
return JobSummarizer.runtime_constraint_mem_unit
else:
- return ContainerSummarizer.runtime_constraint_mem_unit
+ return ContainerRequestSummarizer.runtime_constraint_mem_unit
def _map_runtime_constraint(self, key):
if hasattr(self, 'map_runtime_constraint'):
if '-dz642-' in uuid:
if process is None:
- process = arv.containers().get(uuid=uuid).execute()
- klass = ContainerTreeSummarizer
+ # Get the associated CR. Doesn't matter which since they all have the same logs
+ crs = arv.container_requests().list(filters=[['container_uuid','=',uuid]],limit=1).execute()['items']
+ if len(crs) > 0:
+ process = crs[0]
+ klass = ContainerRequestTreeSummarizer
elif '-xvhdp-' in uuid:
if process is None:
process = arv.container_requests().get(uuid=uuid).execute()
- klass = ContainerTreeSummarizer
+ klass = ContainerRequestTreeSummarizer
elif '-8i9sb-' in uuid:
if process is None:
process = arv.jobs().get(uuid=uuid).execute()
self.process = process
if label is None:
label = self.process.get('name', self.process['uuid'])
- if self.process.get('log'):
+ # Pre-Arvados v1.4 everything is in 'log'
+ # For 1.4+ containers have no logs and container_requests have them in 'log_uuid', not 'log'
+ log_collection = self.process.get('log')
+ if not log_collection:
+ log_collection = self.process.get('log_uuid')
+ if log_collection:
try:
- rdr = crunchstat_summary.reader.CollectionReader(self.process['log'])
+ rdr = crunchstat_summary.reader.CollectionReader(log_collection)
except arvados.errors.NotFoundError as e:
logger.warning("Trying event logs after failing to read "
"log collection %s: %s", self.process['log'], e)
}
-class ContainerSummarizer(ProcessSummarizer):
+class ContainerRequestSummarizer(ProcessSummarizer):
runtime_constraint_mem_unit = 1
**kwargs)
-class ContainerTreeSummarizer(MultiSummarizer):
+class ContainerRequestTreeSummarizer(MultiSummarizer):
def __init__(self, root, skip_child_jobs=False, **kwargs):
arv = arvados.api('v1', model=OrderedJsonModel())
current = todo.popleft()
label = current['name']
sort_key = current['created_at']
- if current['uuid'].find('-xvhdp-') > 0:
- current = arv.containers().get(uuid=current['container_uuid']).execute()
- summer = ContainerSummarizer(current, label=label, **kwargs)
+ summer = ContainerRequestSummarizer(current, label=label, **kwargs)
summer.sort_key = sort_key
children[current['uuid']] = summer
child_crs = arv.container_requests().index(
order=['uuid asc'],
filters=page_filters+[
- ['requesting_container_uuid', '=', current['uuid']]],
+ ['requesting_container_uuid', '=', current['container_uuid']]],
).execute()
if not child_crs['items']:
break
sorted_children = collections.OrderedDict()
for uuid in sorted(list(children.keys()), key=lambda uuid: children[uuid].sort_key):
sorted_children[uuid] = children[uuid]
- super(ContainerTreeSummarizer, self).__init__(
+ super(ContainerRequestTreeSummarizer, self).__init__(
children=sorted_children,
label=root['name'],
**kwargs)
--- /dev/null
+category metric task_max task_max_rate job_total
+blkio:0:0 read 0 0 0
+blkio:0:0 write 0 0 0
+cpu cpus 20 - -
+cpu sys 0.39 0.04 0.39
+cpu user 2.06 0.20 2.06
+cpu user+sys 2.45 0.24 2.45
+fuseops read 0 0 0
+fuseops write 0 0 0
+keepcache hit 0 0 0
+keepcache miss 0 0 0
+keepcalls get 0 0 0
+keepcalls put 0 0 0
+mem cache 172032 - -
+mem pgmajfault 0 - 0
+mem rss 69525504 - -
+mem swap 0 - -
+net:eth0 rx 859480 1478.97 859480
+net:eth0 tx 55888 395.71 55888
+net:eth0 tx+rx 915368 1874.69 915368
+net:keep0 rx 0 0 0
+net:keep0 tx 0 0 0
+net:keep0 tx+rx 0 0 0
+statfs available 397744787456 - 397744787456
+statfs total 402611240960 - 402611240960
+statfs used 4870303744 52426.18 4866453504
+time elapsed 20 - 20
+# Number of tasks: 1
+# Max CPU time spent by a single task: 2.45s
+# Max CPU usage in a single interval: 23.70%
+# Overall CPU usage: 12.25%
+# Max memory used by a single task: 0.07GB
+# Max network traffic in a single task: 0.00GB
+# Max network speed in a single interval: 0.00MB/s
+# Keep cache miss rate 0.00%
+# Keep cache utilization 0.00%
+# Temp disk utilization 1.21%
+#!! container max RSS was 67 MiB -- try reducing runtime_constraints to "ram":1020054732
+#!! container max temp disk utilization was 1% of 383960 MiB -- consider reducing "tmpdirMin" and/or "outdirMin"
s.run()
-class SummarizeContainer(ReportDiff):
+class SummarizeContainerCommon(ReportDiff):
fake_container = {
'uuid': '9tee4-dz642-lymtndkpy39eibk',
'created_at': '2017-08-18T14:27:25.371388141',
'log': '9tee4-4zz18-ihyzym9tcwjwg4r',
}
fake_request = {
- 'uuid': '9tee4-xvhdp-uper95jktm10d3w',
+ 'uuid': '9tee4-xvhdp-kk0ja1cl8b2kr1y',
'name': 'container',
'created_at': '2017-08-18T14:27:25.242339223Z',
'container_uuid': fake_container['uuid'],
- }
- reportfile = os.path.join(
- TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk.txt.gz')
+ 'runtime_constraints': {
+ 'vcpus': 1,
+ 'ram': 2621440000
+ },
+ 'log_uuid' : '9tee4-4zz18-m2swj50nk0r8b6y'
+ }
+
logfile = os.path.join(
- TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-crunchstat.txt.gz')
+ TESTS_DIR, 'container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-crunchstat.txt.gz')
arvmountlog = os.path.join(
- TESTS_DIR, 'container_9tee4-dz642-lymtndkpy39eibk-arv-mount.txt.gz')
+ TESTS_DIR, 'container_request_9tee4-xvhdp-kk0ja1cl8b2kr1y-arv-mount.txt.gz')
@mock.patch('arvados.collection.CollectionReader')
@mock.patch('arvados.api')
- def test_container(self, mock_api, mock_cr):
- mock_api().container_requests().index().execute.return_value = {'items':[]}
+ def check_common(self, mock_api, mock_cr):
+ items = [ {'items':[self.fake_request]}] + [{'items':[]}] * 100
+ # Index and list mean the same thing, but are used in different places in the
+ # code. It's fragile, but exploit that fact to distinguish the two uses.
+ mock_api().container_requests().index().execute.return_value = {'items': [] } # child_crs
+ mock_api().container_requests().list().execute.side_effect = items # parent request
mock_api().container_requests().get().execute.return_value = self.fake_request
mock_api().containers().get().execute.return_value = self.fake_container
mock_cr().__iter__.return_value = [
return UTF8Decode(gzip.open(self.arvmountlog))
mock_cr().open.side_effect = _open
args = crunchstat_summary.command.ArgumentParser().parse_args(
- ['--container', self.fake_request['uuid']])
+ self.arg_strings)
cmd = crunchstat_summary.command.Command(args)
cmd.run()
self.diff_known_report(self.reportfile, cmd)
+
+class SummarizeContainer(SummarizeContainerCommon):
+ uuid = '9tee4-dz642-lymtndkpy39eibk'
+ reportfile = os.path.join(TESTS_DIR, 'container_%s.txt.gz' % uuid)
+ arg_strings = ['--container', uuid, '-v', '-v']
+
+ def test_container(self):
+ self.check_common()
+
+
+class SummarizeContainerRequest(SummarizeContainerCommon):
+ uuid = '9tee4-xvhdp-kk0ja1cl8b2kr1y'
+ reportfile = os.path.join(TESTS_DIR, 'container_request_%s.txt.gz' % uuid)
+ arg_strings = ['--container-request', uuid, '-v', '-v']
+
+ def test_container_request(self):
+ self.check_common()
+
+
class SummarizeJob(ReportDiff):
fake_job_uuid = '4xphq-8i9sb-jq0ekny1xou3zoh'
fake_log_id = 'fake-log-collection-id'
"ignore": "test",
"package": [
{
- "checksumSHA1": "j4je0EzPGzjb6INLY1BHZ+hyMjc=",
+ "checksumSHA1": "jfYWZyRWLMfG0J5K7G2K8a9AKfs=",
"origin": "github.com/curoverse/goamz/aws",
"path": "github.com/AdRoll/goamz/aws",
- "revision": "888b4804f2653cd35ebcc95f046079e63b5b2799",
- "revisionTime": "2017-07-27T13:52:37Z"
+ "revision": "1bba09f407ef1d02c90bc37eff7e91e2231fa587",
+ "revisionTime": "2019-09-05T14:15:25Z"
},
{
- "checksumSHA1": "0+n3cT6e7sQCCbBAH8zg6neiHTk=",
+ "checksumSHA1": "lqoARtBgwnvhEhLyIjR3GLnR5/c=",
"origin": "github.com/curoverse/goamz/s3",
"path": "github.com/AdRoll/goamz/s3",
- "revision": "888b4804f2653cd35ebcc95f046079e63b5b2799",
- "revisionTime": "2017-07-27T13:52:37Z"
+ "revision": "1bba09f407ef1d02c90bc37eff7e91e2231fa587",
+ "revisionTime": "2019-09-05T14:15:25Z"
},
{
"checksumSHA1": "tvxbsTkdjB0C/uxEglqD6JfVnMg=",
"origin": "github.com/curoverse/goamz/s3/s3test",
"path": "github.com/AdRoll/goamz/s3/s3test",
- "revision": "888b4804f2653cd35ebcc95f046079e63b5b2799",
- "revisionTime": "2017-07-27T13:52:37Z"
+ "revision": "1bba09f407ef1d02c90bc37eff7e91e2231fa587",
+ "revisionTime": "2019-09-05T14:15:25Z"
},
{
"checksumSHA1": "KF4DsRUpZ+h+qRQ/umRAQZfVvw0=",