Closes #6221, #6673.
therubyracer
uglifier (>= 1.0.3)
wiselinks
-
-BUNDLED WITH
- 1.10.5
@@notification_tests = []
@@notification_tests.push lambda { |controller, current_user|
+ return nil if Rails.configuration.shell_in_a_box_url
AuthorizedKey.limit(1).where(authorized_user_uuid: current_user.uuid).each do
return nil
end
else
s = ""
if days > 0
- s += "#{days}<span class='time-label-divider'>d</span> "
+ s += "#{days}<span class='time-label-divider'>d</span>"
end
if (hours > 0)
s += "#{minutes}<span class='time-label-divider'>m</span>"
- if not round_to_min
+ if not round_to_min or (days == 0 and hours == 0 and minutes == 0)
s += "#{seconds}<span class='time-label-divider'>s</span>"
end
end
<div class="col-md-3">
<% if current_job[:started_at] %>
<% walltime = ((if current_job[:finished_at] then current_job[:finished_at] else Time.now() end) - current_job[:started_at]) %>
- <% cputime = tasks.map { |task|
- if task.started_at and task.job_uuid == current_job[:uuid]
- finished_at = task.finished_at || current_job[:finished_at] || Time.now()
- finished_at - task.started_at
- else
- 0
- end
- }.reduce(:+) || 0 %>
- <%= render_runtime(walltime, false, false) %>
- <% if cputime > 0 %> / <%= render_runtime(cputime, false, false) %> (<%= (cputime/walltime).round(1) %>⨯)<% end %>
+ <% cputime = (current_job[:runtime_constraints].andand[:min_nodes] || 1) *
+ ((current_job[:finished_at] || Time.now()) - current_job[:started_at]) %>
+ <%= render_runtime(walltime, false) %>
+ <% if cputime > 0 %> / <%= render_runtime(cputime, false) %> (<%= (cputime/walltime).round(1) %>⨯)<% end %>
<% end %>
</div>
<% end %>
<%# column offset 5 %>
<div class="col-md-6">
<% queuetime = Time.now - Time.parse(current_job[:created_at].to_s) %>
- Queued for <%= render_runtime(queuetime, true) %>.
+ Queued for <%= render_runtime(queuetime, false) %>.
<% begin %>
<% if current_job[:queue_position] == 0 %>
This job is next in the queue to run.
<% pipeline_jobs = render_pipeline_jobs %>
<% job_uuids = pipeline_jobs.map { |j| j[:job].andand[:uuid] }.compact %>
-<% job_uuids_finished = {}; pipeline_jobs.map { |j| job_uuids_finished[j[:job].andand[:uuid]] = j[:job].andand[:finished_at] } %>
<% if @object.state == 'Paused' %>
<p>
</p>
<% end %>
-<% tasks = JobTask.filter([['job_uuid', 'in', job_uuids]]).results %>
<% runningtime = determine_wallclock_runtime(pipeline_jobs.map {|j| j[:job]}.compact) %>
<p>
end %>
<%= if walltime > runningtime
- render_runtime(walltime, true, false)
+ render_runtime(walltime, false)
else
- render_runtime(runningtime, true, false)
+ render_runtime(runningtime, false)
end %><% if @object.finished_at %> at <%= render_localized_date(@object.finished_at) %><% end %>.
<% else %>
This pipeline is <%= if @object.state.start_with? 'Running' then 'active' else @object.state.downcase end %>.
ran
<% end %>
for
- <% cputime = tasks.map { |task|
- if task.started_at
- finished_at = task.finished_at || job_uuids_finished[task.job_uuid] || Time.now()
- finished_at - task.started_at
+ <%
+ cputime = pipeline_jobs.map { |j|
+ if j[:job][:started_at]
+ (j[:job][:runtime_constraints].andand[:min_nodes] || 1) * ((j[:job][:finished_at] || Time.now()) - j[:job][:started_at])
else
0
end
}.reduce(:+) || 0 %>
- <%= render_runtime(runningtime, true, false) %><% if (walltime - runningtime) > 0 %>
- (<%= render_runtime(walltime - runningtime, true, false) %> queued)<% end %><% if cputime == 0 %>.<% else %>
+ <%= render_runtime(runningtime, false) %><% if (walltime - runningtime) > 0 %>
+ (<%= render_runtime(walltime - runningtime, false) %> queued)<% end %><% if cputime == 0 %>.<% else %>
and used
- <%= render_runtime(cputime, true, false) %>
- of CPU time (<%= (cputime/runningtime).round(1) %>⨯ scaling).
+ <%= render_runtime(cputime, false) %>
+ of node allocation time (<%= (cputime/runningtime).round(1) %>⨯ scaling).
<% end %>
</p>
%>
<% pipeline_jobs.each_with_index do |pj, i| %>
- <%= render partial: 'running_component', locals: {tasks: tasks, pj: pj, i: i, expanded: false} %>
+ <%= render partial: 'running_component', locals: {pj: pj, i: i, expanded: false} %>
<% end %>
<%= writable ? 'writable' : 'read-only' %>
</td>
<td style="word-break:break-all;">
+ <code><%= repo.http_fetch_url %></code><br/>
<code><%= writable ? repo.push_url : repo.fetch_url %></code>
</td>
<td>
<div id="manage_virtual_machines" class="panel-body">
<p>
- For more information see <%= link_to raw('Arvados Docs → User Guide → SSH access'),
- "#{Rails.configuration.arvados_docsite}/user/getting_started/ssh-access-unix.html",
+ For more information see <%= link_to raw('Arvados Docs → User Guide → VM access'),
+ "#{Rails.configuration.arvados_docsite}/user/getting_started/vm-login-with-webshell.html",
target: "_blank"%>.
- <% if @my_virtual_machines.any? or true %>
- A sample <code>~/.ssh/config</code> entry is provided below.
- <% end %>
</p>
<% if !@my_virtual_machines.any? %>
</tbody>
</table>
- <p><i>~/.ssh/config:</i></p>
+ <p>Sample SSH Conig <i>~/.ssh/config:</i></p>
<pre>Host *.arvados
TCPKeepAlive yes
ServerAliveInterval 60
setTimeout(function() {
sh.keysPressed("<%= j Thread.current[:arvados_api_token] %>\n");
sh.vt100('(sent authentication token)\n');
- }, 1000);
- }, 1000);
+ }, 2000);
+ }, 2000);
}
// -->
</script>
ks.each do |kk|
cfg = cfg.send(kk)
end
- if cfg.respond_to?(k.to_sym) and !cfg.send(k).nil?
- # Config must have been set already in environments/*.rb.
+ if v.nil? and cfg.respond_to?(k) and !cfg.send(k).nil?
+ # Config is nil in *.yml, but has been set already in
+ # environments/*.rb (or has a Rails default). Don't overwrite
+ # the default/upstream config with nil.
#
# After config files have been migrated, this mechanism should
- # be deprecated, then removed.
+ # be removed.
+ Rails.logger.warn <<EOS
+DEPRECATED: Inheriting config.#{ks.join '.'} from Rails config.
+ Please move this config into config/application.yml.
+EOS
elsif v.nil?
# Config variables are not allowed to be nil. Make a "naughty"
# list, and present it below.
assert_equal([['.', 'foo', 3]], assigns(:object).files)
end
end
+
+ test 'Edit name and verify that a duplicate is not created' do
+ @controller = ProjectsController.new
+ project = api_fixture("groups")["aproject"]
+ post :update, {
+ id: project["uuid"],
+ project: {
+ name: 'test name'
+ },
+ format: :json
+ }, session_for(:active)
+ assert_includes @response.body, 'test name'
+ updated = assigns(:object)
+ assert_equal updated.uuid, project["uuid"]
+ assert_equal 'test name', updated.name
+ end
end
assert_equal files.sort, disabled.sort, "Expected to see all collection files in disabled list of files"
end
+
+ test "anonymous user accesses collection in shared project" do
+ Rails.configuration.anonymous_user_token =
+ api_fixture('api_client_authorizations')['anonymous']['api_token']
+ collection = api_fixture('collections')['public_text_file']
+ get(:show, {id: collection['uuid']})
+
+ response_object = assigns(:object)
+ assert_equal collection['name'], response_object['name']
+ assert_equal collection['uuid'], response_object['uuid']
+ assert_includes @response.body, 'Hello world'
+ assert_includes @response.body, 'Content address'
+ refute_nil css_select('[href="#Advanced"]')
+ end
+
+ test "can view empty collection" do
+ get :show, {id: 'd41d8cd98f00b204e9800998ecf8427e+0'}, session_for(:active)
+ assert_includes @response.body, 'The following collections have this content'
+ end
+
+ test "collection portable data hash redirect" do
+ di = api_fixture('collections')['docker_image']
+ get :show, {id: di['portable_data_hash']}, session_for(:active)
+ assert_match /\/collections\/#{di['uuid']}/, @response.redirect_url
+ end
+
+ test "collection portable data hash with multiple matches" do
+ pdh = api_fixture('collections')['foo_file']['portable_data_hash']
+ get :show, {id: pdh}, session_for(:admin)
+ matches = api_fixture('collections').select {|k,v| v["portable_data_hash"] == pdh}
+ assert matches.size > 1
+
+ matches.each do |k,v|
+ assert_match /href="\/collections\/#{v['uuid']}">.*#{v['name']}<\/a>/, @response.body
+ end
+
+ assert_includes @response.body, 'The following collections have this content:'
+ assert_not_includes @response.body, 'more results are not shown'
+ assert_not_includes @response.body, 'Activity'
+ assert_not_includes @response.body, 'Sharing and permissions'
+ end
+
+ test "collection page renders name" do
+ collection = api_fixture('collections')['foo_file']
+ get :show, {id: collection['uuid']}, session_for(:active)
+ assert_includes @response.body, collection['name']
+ assert_match /href="#{collection['uuid']}\/foo" ><\/i> foo</, @response.body
+ end
+
+ test "No Upload tab on non-writable collection" do
+ get :show, {id: api_fixture('collections')['user_agreement']['uuid']}, session_for(:active)
+ assert_not_includes @response.body, '<a href="#Upload"'
+ end
end
require 'test_helper'
class JobsControllerTest < ActionController::TestCase
+ test "visit jobs index page" do
+ get :index, {}, session_for(:active)
+ assert_response :success
+ end
end
assert_match /\/users\/welcome/, @response.redirect_url
assert_empty css_select('[href="/projects/public"]')
end
+
+ test "find a project and edit its description" do
+ project = api_fixture('groups')['aproject']
+ use_token :active
+ found = Group.find(project['uuid'])
+ found.description = 'test description update'
+ found.save!
+ get(:show, {id: project['uuid']}, session_for(:active))
+ assert_includes @response.body, 'test description update'
+ end
+
+ test "find a project and edit description to textile description" do
+ project = api_fixture('groups')['aproject']
+ use_token :active
+ found = Group.find(project['uuid'])
+ found.description = '*test bold description for textile formatting*'
+ found.save!
+ get(:show, {id: project['uuid']}, session_for(:active))
+ assert_includes @response.body, '<strong>test bold description for textile formatting</strong>'
+ end
+
+ test "find a project and edit description to html description" do
+ project = api_fixture('groups')['aproject']
+ use_token :active
+ found = Group.find(project['uuid'])
+ found.description = 'Textile description with link to home page <a href="/">take me home</a>.'
+ found.save!
+ get(:show, {id: project['uuid']}, session_for(:active))
+ assert_includes @response.body, 'Textile description with link to home page <a href="/">take me home</a>.'
+ end
+
+ test "find a project and edit description to textile description with link to object" do
+ project = api_fixture('groups')['aproject']
+ use_token :active
+ found = Group.find(project['uuid'])
+
+ # uses 'Link to object' as a hyperlink for the object
+ found.description = '"Link to object":' + api_fixture('groups')['asubproject']['uuid']
+ found.save!
+ get(:show, {id: project['uuid']}, session_for(:active))
+
+ # check that input was converted to textile, not staying as inputted
+ refute_includes @response.body,'"Link to object"'
+ refute_empty css_select('[href="/groups/zzzzz-j7d0g-axqo7eu9pwvna1x"]')
+ end
+
+ test "project viewer can't see project sharing tab" do
+ project = api_fixture('groups')['aproject']
+ get(:show, {id: project['uuid']}, session_for(:project_viewer))
+ refute_includes @response.body, '<div id="Sharing"'
+ assert_includes @response.body, '<div id="Data_collections"'
+ end
+
+ [
+ 'admin',
+ 'active',
+ ].each do |username|
+ test "#{username} can see project sharing tab" do
+ project = api_fixture('groups')['aproject']
+ get(:show, {id: project['uuid']}, session_for(username))
+ assert_includes @response.body, '<div id="Sharing"'
+ assert_includes @response.body, '<div id="Data_collections"'
+ end
+ end
+
+ [
+ ['admin',true],
+ ['active',true],
+ ['project_viewer',false],
+ ].each do |user, can_move|
+ test "#{user} can move subproject from project #{can_move}" do
+ get(:show, {id: api_fixture('groups')['aproject']['uuid']}, session_for(user))
+ if can_move
+ assert_includes @response.body, 'Move project...'
+ else
+ refute_includes @response.body, 'Move project...'
+ end
+ end
+ end
+
+ [
+ ["jobs", "/jobs"],
+ ["pipelines", "/pipeline_instances"],
+ ["collections", "/collections"],
+ ].each do |target,path|
+ test "test dashboard button all #{target}" do
+ get :index, {}, session_for(:active)
+ assert_includes @response.body, "href=\"#{path}\""
+ assert_includes @response.body, "All #{target}"
+ end
+ end
end
require 'test_helper'
class UsersControllerTest < ActionController::TestCase
+
test "valid token works in controller test" do
get :index, {}, session_for(:active)
assert_response :success
end
assert_equal 1, found_email, "Expected 1 email after requesting shell access"
end
+
+ [
+ 'admin',
+ 'active',
+ ].each do |username|
+ test "access users page as #{username} and verify show button is available" do
+ admin_user = api_fixture('users','admin')
+ active_user = api_fixture('users','active')
+ get :index, {}, session_for(username)
+ if username == 'admin'
+ assert_match /<a href="\/projects\/#{admin_user['uuid']}">Home<\/a>/, @response.body
+ assert_match /<a href="\/projects\/#{active_user['uuid']}">Home<\/a>/, @response.body
+ assert_match /href="\/users\/#{admin_user['uuid']}" title="show user"><i class="fa fa-fw fa-user"><\/i> Show<\/a/, @response.body
+ assert_match /href="\/users\/#{active_user['uuid']}" title="show user"><i class="fa fa-fw fa-user"><\/i> Show<\/a/, @response.body
+ assert_includes @response.body, admin_user['email']
+ assert_includes @response.body, active_user['email']
+ else
+ refute_match /Home<\/a>/, @response.body
+ refute_match /href="\/users\/#{admin_user['uuid']}" title="show user"><i class="fa fa-fw fa-user"><\/i> Show<\/a/, @response.body
+ assert_match /href="\/users\/#{active_user['uuid']}" title="show user"><i class="fa fa-fw fa-user"><\/i> Show<\/a/, @response.body
+ assert_includes @response.body, active_user['email']
+ end
+ end
+ end
+
+ [
+ 'admin',
+ 'active',
+ ].each do |username|
+ test "access settings drop down menu as #{username}" do
+ admin_user = api_fixture('users','admin')
+ active_user = api_fixture('users','active')
+ get :show, {
+ id: api_fixture('users')[username]['uuid']
+ }, session_for(username)
+ if username == 'admin'
+ assert_includes @response.body, admin_user['email']
+ refute_empty css_select('[id="system-menu"]')
+ else
+ assert_includes @response.body, active_user['email']
+ assert_empty css_select('[id="system-menu"]')
+ end
+ end
+ end
end
["filename.yml", true],
["filename.bam", false],
+ ["filename.tar", false],
["filename", false],
].each do |file_name, preview_allowed|
test "verify '#{file_name}' is allowed for preview #{preview_allowed}" do
end
end
end
-
- test "anonymous user accesses collection in shared project" do
- visit "/collections/#{api_fixture('collections')['public_text_file']['uuid']}"
-
- # in collection page
- assert_text 'Public Projects Unrestricted public data'
- assert_text 'Hello world'
- assert_text 'Content address'
- assert_selector 'a', text: 'Provenance graph'
- end
end
assert page.has_link?('Report a problem ...'), 'No link - Report a problem'
end
end
+
+ test "no SSH public key notification when shell_in_a_box_url is configured" do
+ Rails.configuration.shell_in_a_box_url = 'example.com'
+ visit page_with_token('job_reader')
+ click_link 'notifications-menu'
+ assert_no_selector 'a', text:'Click here to set up an SSH public key for use with Arvados.'
+ assert_selector 'a', text:'Click here to learn how to run an Arvados Crunch pipeline'
+ end
+
+ [
+ ['Repositories','repository','Attributes'],
+ ['Virtual machines','virtual machine','current_user_logins'],
+ ['SSH keys','authorized key','public_key'],
+ ['Links','link','link_class'],
+ ['Groups','group','group_class'],
+ ['Compute nodes','node','info[ping_secret'],
+ ['Keep services','keep service','service_ssl_flag'],
+ ['Keep disks', 'keep disk','bytes_free'],
+ ].each do |page_name, add_button_text, look_for|
+ test "test system menu #{page_name} link" do
+ skip 'Skip repositories test until #6652 is fixed.' if page_name == 'Repositories'
+
+ visit page_with_token('admin')
+ within('.navbar-fixed-top') do
+ page.find("#system-menu").click
+ within('.dropdown-menu') do
+ assert_selector 'a', text: page_name
+ find('a', text: page_name).click
+ end
+ end
+
+ # click the add button
+ assert_selector 'button', text: "Add a new #{add_button_text}"
+ find('button', text: "Add a new #{add_button_text}").click
+
+ # look for unique property in the created object page
+ assert page.has_text? look_for
+ end
+ end
end
assert_selector 'div#Upload.active div.panel'
end
- test "No Upload tab on non-writable collection" do
- need_javascript
- visit(page_with_token 'active',
- '/collections/'+api_fixture('collections')['user_agreement']['uuid'])
- assert_no_selector '.nav-tabs Upload'
- end
-
test "Upload two empty files with the same name" do
need_selenium "to make file uploads work"
visit page_with_token 'active', sandbox_path
assert_text "Copy of #{collection_name}"
end
- test "Collection page renders name" do
- Capybara.current_driver = :rack_test
- uuid = api_fixture('collections')['foo_file']['uuid']
- coll_name = api_fixture('collections')['foo_file']['name']
- visit page_with_token('active', "/collections/#{uuid}")
- assert(page.has_text?(coll_name), "Collection page did not include name")
- # Now check that the page is otherwise normal, and the collection name
- # isn't only showing up in an error message.
- assert(page.has_link?('foo'), "Collection page did not include file link")
- end
-
def check_sharing(want_state, link_regexp)
# We specifically want to click buttons. See #4291.
if want_state == :off
end
end
- test "can view empty collection" do
- Capybara.current_driver = :rack_test
- uuid = 'd41d8cd98f00b204e9800998ecf8427e+0'
- visit page_with_token('active', "/collections/#{uuid}")
- assert page.has_text?(/This collection is empty|The following collections have this content/)
- end
-
test "combine selected collections into new collection" do
foo_collection = api_fixture('collections')['foo_file']
bar_collection = api_fixture('collections')['bar_file']
assert(page.has_text?('file2_in_subdir4.txt'), 'file not found - file1_in_subdir4.txt')
end
- test "Collection portable data hash redirect" do
- di = api_fixture('collections')['docker_image']
- visit page_with_token('active', "/collections/#{di['portable_data_hash']}")
-
- # check redirection
- assert current_path.end_with?("/collections/#{di['uuid']}")
- assert page.has_text?("docker_image")
- assert page.has_text?("Activity")
- assert page.has_text?("Sharing and permissions")
- end
-
- test "Collection portable data hash with multiple matches" do
- pdh = api_fixture('collections')['foo_file']['portable_data_hash']
- visit page_with_token('admin', "/collections/#{pdh}")
-
- matches = api_fixture('collections').select {|k,v| v["portable_data_hash"] == pdh}
- assert matches.size > 1
-
- matches.each do |k,v|
- assert page.has_link?(v["name"]), "Page /collections/#{pdh} should contain link '#{v['name']}'"
- end
- assert_text 'The following collections have this content:'
- assert_no_text 'more results are not shown'
- assert_no_text 'Activity'
- assert_no_text 'Sharing and permissions'
- end
-
test "Collection portable data hash with multiple matches with more than one page of results" do
pdh = api_fixture('collections')['baz_file']['portable_data_hash']
visit page_with_token('admin', "/collections/#{pdh}")
page_text = page.text
if run_time
- match = /This pipeline started at (.*)\. It failed after (.*) seconds at (.*)\. Check the Log/.match page_text
+ match = /This pipeline started at (.*)\. It failed after (.*) at (.*)\. Check the Log/.match page_text
else
match = /This pipeline started at (.*). It has been active for(.*)/.match page_text
end
"Description update did not survive page refresh")
end
- test 'Find a project and edit description to textile description' do
- visit page_with_token 'active', '/'
- find("#projects-menu").click
- find(".dropdown-menu a", text: "A Project").click
- within('.container-fluid', text: api_fixture('groups')['aproject']['name']) do
- find('span', text: api_fixture('groups')['aproject']['name']).click
- within('.arv-description-as-subtitle') do
- find('.fa-pencil').click
- find('.editable-input textarea').set('<p>*Textile description for A project* - "take me home":/ </p><p>And a new paragraph in description.</p>')
- find('.editable-submit').click
- end
- wait_for_ajax
- end
-
- # visit project page
- visit current_path
- assert_no_text '*Textile description for A project*'
- assert(find?('.container-fluid', text: 'Textile description for A project'),
- "Description update did not survive page refresh")
- assert(find?('.container-fluid', text: 'And a new paragraph in description'),
- "Description did not contain the expected new paragraph")
- assert(page.has_link?("take me home"), "link not found in description")
-
- click_link 'take me home'
-
- # now in dashboard
- assert(page.has_text?('Active pipelines'), 'Active pipelines - not found on dashboard')
- end
-
- test 'Find a project and edit description to html description' do
- visit page_with_token 'active', '/'
- find("#projects-menu").click
- find(".dropdown-menu a", text: "A Project").click
- within('.container-fluid', text: api_fixture('groups')['aproject']['name']) do
- find('span', text: api_fixture('groups')['aproject']['name']).click
- within('.arv-description-as-subtitle') do
- find('.fa-pencil').click
- find('.editable-input textarea').set('<br>Textile description for A project</br> - <a href="/">take me home</a>')
- find('.editable-submit').click
- end
- wait_for_ajax
- end
- visit current_path
- assert(find?('.container-fluid', text: 'Textile description for A project'),
- "Description update did not survive page refresh")
- assert(!find?('.container-fluid', text: '<br>Textile description for A project</br>'),
- "Textile description is displayed with uninterpreted formatting characters")
- assert(page.has_link?("take me home"),"link not found in description")
- click_link 'take me home'
- assert page.has_text?('Active pipelines')
- end
-
- test 'Find a project and edit description to textile description with link to object' do
- visit page_with_token 'active', '/'
- find("#projects-menu").click
- find(".dropdown-menu a", text: "A Project").click
- within('.container-fluid', text: api_fixture('groups')['aproject']['name']) do
- find('span', text: api_fixture('groups')['aproject']['name']).click
- within('.arv-description-as-subtitle') do
- find('.fa-pencil').click
- find('.editable-input textarea').set('*Textile description for A project* - "go to sub-project":' + api_fixture('groups')['asubproject']['uuid'] + "'")
- find('.editable-submit').click
- end
- wait_for_ajax
- end
- visit current_path
- assert(find?('.container-fluid', text: 'Textile description for A project'),
- "Description update did not survive page refresh")
- assert(!find?('.container-fluid', text: '*Textile description for A project*'),
- "Textile description is displayed with uninterpreted formatting characters")
- assert(page.has_link?("go to sub-project"), "link not found in description")
- click_link 'go to sub-project'
- assert(page.has_text?(api_fixture('groups')['asubproject']['name']), 'sub-project name not found after clicking link')
- end
-
- test 'Add a new name, then edit it, without creating a duplicate' do
- project_uuid = api_fixture('groups')['aproject']['uuid']
- specimen_uuid = api_fixture('traits')['owned_by_aproject_with_no_name']['uuid']
- visit page_with_token 'active', '/projects/' + project_uuid
- click_link 'Other objects'
- within '.selection-action-container' do
- # Wait for the tab to load:
- assert_selector 'tr[data-kind="arvados#trait"]'
- within first('tr', text: 'Trait') do
- find(".fa-pencil").click
- find('.editable-input input').set('Now I have a name.')
- find('.glyphicon-ok').click
- assert_selector '.editable', text: 'Now I have a name.'
- find(".fa-pencil").click
- find('.editable-input input').set('Now I have a new name.')
- find('.glyphicon-ok').click
- end
- wait_for_ajax
- assert_selector '.editable', text: 'Now I have a new name.'
- end
- visit current_path
- click_link 'Other objects'
- within '.selection-action-container' do
- find '.editable', text: 'Now I have a new name.'
- assert_no_selector '.editable', text: 'Now I have a name.'
- end
- end
-
test 'Create a project and move it into a different project' do
visit page_with_token 'active', '/projects'
find("#projects-menu").click
text: group_name("anonymous_group"))
end
- test "project viewer can't see project sharing tab" do
- show_object_using('project_viewer', 'groups', 'aproject', 'A Project')
- assert(page.has_no_link?("Sharing"),
- "read-only project user sees sharing tab")
- end
-
test "project owner can manage sharing for another user" do
add_user = api_fixture('users')['future_project_user']
new_name = ["first_name", "last_name"].map { |k| add_user[k] }.join(" ")
end
end
- [
- ["jobs", "/jobs"],
- ["pipelines", "/pipeline_instances"],
- ["collections", "/collections"]
- ].each do |target,path|
- test "Test dashboard button all #{target}" do
- visit page_with_token 'active', '/'
- click_link "All #{target}"
- assert_equal path, current_path
- end
- end
-
def scroll_setup(project_name,
total_nbr_items,
item_list_parameter,
end
end
- # Move button accessibility
- [
- ['admin', true],
- ['active', true], # project owner
- ['project_viewer', false],
- ].each do |user, can_move|
- test "#{user} can move subproject under another user's Home #{can_move}" do
- project = api_fixture('groups')['aproject']
- collection = api_fixture('collections')['collection_to_move_around_in_aproject']
-
- # verify the project move button
- visit page_with_token user, "/projects/#{project['uuid']}"
- if can_move
- assert page.has_link? 'Move project...'
- else
- assert page.has_no_link? 'Move project...'
- end
- end
- end
-
test "error while loading tab" do
original_arvados_v1_base = Rails.configuration.arvados_v1_base
page.find_field('public_key').set 'first test with an incorrect ssh key value'
click_button 'Submit'
- assert page.has_text?('Public key does not appear to be a valid ssh-rsa or dsa public key'), 'No text - Public key does not appear to be a valid'
+ assert_text 'Public key does not appear to be a valid ssh-rsa or dsa public key'
public_key_str = api_fixture('authorized_keys')['active']['public_key']
page.find_field('public_key').set public_key_str
page.find_field('name').set 'added_in_test'
click_button 'Submit'
- assert page.has_text?('Public key already exists in the database, use a different key.'), 'No text - Public key already exists'
+ assert_text 'Public key already exists in the database, use a different key.'
new_key = SSHKey.generate
page.find_field('public_key').set new_key.ssh_public_key
end
# key must be added. look for it in the refreshed page
- assert page.has_text?('added_in_test'), 'No text - added_in_test'
+ assert_text 'added_in_test'
end
[
click_on "Create"
end
assert_text ":active/workbenchtest.git"
+ assert_match /git@git.*:active\/workbenchtest.git/, page.text
+ assert_match /https:\/\/git.*\/active\/workbenchtest.git/, page.text
end
end
click_link 'Metadata'
assert page.has_text? 'VirtualMachine: testvm.shell'
end
-
- [
- 'admin',
- 'active',
- ].each do |username|
- test "login as #{username} and access show button" do
- need_javascript
-
- user = api_fixture('users', username)
-
- visit page_with_token(username, '/users')
-
- within('tr', text: user['uuid']) do
- assert_text user['email']
- if username == 'admin'
- assert_selector 'a', text: 'Home'
- else
- assert_no_selector 'a', text: 'Home'
- end
- assert_selector 'a', text: 'Show'
- find('a', text: 'Show').click
- end
- assert_selector 'a', text: 'Attributes'
- end
- end
-
- test "admin user can access another user page" do
- need_javascript
-
- visit page_with_token('admin', '/users')
-
- active_user = api_fixture('users', 'active')
- within('tr', text: active_user['uuid']) do
- assert_text active_user['email']
- assert_selector "a[href=\"/projects/#{active_user['uuid']}\"]", text: 'Home'
- assert_selector 'a', text: 'Show'
- find('a', text: 'Show').click
- end
- assert_selector 'a', text:'Attributes'
- end
end
- user/getting_started/workbench.html.textile.liquid
- user/tutorials/tutorial-pipeline-workbench.html.textile.liquid
- Access an Arvados virtual machine:
+ - user/getting_started/vm-login-with-webshell.html.textile.liquid
- user/getting_started/ssh-access-unix.html.textile.liquid
- user/getting_started/ssh-access-windows.html.textile.liquid
- user/getting_started/check-environment.html.textile.liquid
- user/tutorials/tutorial-keep-get.html.textile.liquid
- user/tutorials/tutorial-keep-mount.html.textile.liquid
- user/topics/keep.html.textile.liquid
+ - user/topics/arv-copy.html.textile.liquid
- Run a pipeline on the command line:
- user/topics/running-pipeline-command-line.html.textile.liquid
- user/topics/arv-run.html.textile.liquid
installguide:
- Overview:
- install/index.html.textile.liquid
- - Docker:
+ - Docker-based installation:
- install/pre-built-docker.html.textile.liquid
- install/install-docker.html.textile.liquid
- Manual installation:
- install/install-manual-prerequisites.html.textile.liquid
+ - install/install-sso.html.textile.liquid
- install/install-api-server.html.textile.liquid
- - install/install-workbench-app.html.textile.liquid
- - install/install-shell-server.html.textile.liquid
- - install/create-standard-objects.html.textile.liquid
+ - install/install-arv-git-httpd.html.textile.liquid
- install/install-keepstore.html.textile.liquid
- install/install-keepproxy.html.textile.liquid
- - install/install-arv-git-httpd.html.textile.liquid
- install/install-crunch-dispatch.html.textile.liquid
- install/install-compute-node.html.textile.liquid
+ - install/install-shell-server.html.textile.liquid
+ - install/create-standard-objects.html.textile.liquid
+ - install/install-workbench-app.html.textile.liquid
- install/cheat_sheet.html.textile.liquid
- - Software prerequisites:
- - install/install-manual-prerequisites-ruby.html.textile.liquid
- - install/install-sso.html.textile.liquid
--- /dev/null
+{% include 'notebox_begin' %}
+As stated above, arv-copy is recursive by default and requires a working git repository in the destination cluster. If you do not have a repository created, you can follow the "Adding a new repository":{{site.baseurl}}/user/tutorials/add-new-repository.html page. We will use the *tutorial* repository created in that page as the example.
+
+<br/>In addition, arv-copy requires git when copying to a git repository. Please make sure that git is installed and available.
+
+{% include 'notebox_end' %}
--- /dev/null
+On a Debian-based system, install the following packages:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install git curl</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, install the following packages:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install git curl</span>
+</code></pre>
+</notextile>
--- /dev/null
+On a Debian-based system, install the following packages:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install libpq-dev postgresql</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, install the following packages:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install postgresql postgresql-devel</span>
+</code></pre>
+</notextile>
+
+{% include 'notebox_begin' %}
+
+If you intend to use specific versions of these packages from Software Collections, you may have to adapt some of the package names to match. For example:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install postgresql92 postgresql92-postgresql-devel</span></code></pre></notextile>
+
+{% include 'notebox_end' %}
--- /dev/null
+Currently, only Ruby 2.1 is supported.
+
+h4(#rvm). *Option 1: Install with RVM*
+
+<notextile>
+<pre><code><span class="userinput">sudo gpg --keyserver hkp://keys.gnupg.net --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3
+\curl -sSL https://get.rvm.io | sudo bash -s stable --ruby=2.1
+sudo adduser "$USER" rvm
+</span></code></pre></notextile>
+
+Either log out and log back in to activate RVM, or explicitly load it in all open shells like this:
+
+<notextile>
+<pre><code><span class="userinput">source /usr/local/rvm/scripts/rvm
+</span></code></pre></notextile>
+
+Once RVM is activated in your shell, install Bundler:
+
+<notextile>
+<pre><code>~$ <span class="userinput">gem install bundler</span>
+</code></pre></notextile>
+
+h4(#fromsource). *Option 2: Install from source*
+
+Install prerequisites for Debian 7 or 8:
+
+<notextile>
+<pre><code><span class="userinput">sudo apt-get install \
+ bison build-essential gettext libcurl3 libcurl3-gnutls \
+ libcurl4-openssl-dev libpcre3-dev libreadline-dev \
+ libssl-dev libxslt1.1 zlib1g-dev
+</span></code></pre></notextile>
+
+Install prerequisites for CentOS 6:
+
+<notextile>
+<pre><code><span class="userinput">sudo yum install \
+ libyaml-devel glibc-headers autoconf gcc-c++ glibc-devel \
+ patch readline-devel zlib-devel libffi-devel openssl-devel \
+ automake libtool bison sqlite-devel
+</span></code></pre></notextile>
+
+Install prerequisites for Ubuntu 12.04 or 14.04:
+
+<notextile>
+<pre><code><span class="userinput">sudo apt-get install \
+ gawk g++ gcc make libc6-dev libreadline6-dev zlib1g-dev libssl-dev \
+ libyaml-dev libsqlite3-dev sqlite3 autoconf libgdbm-dev \
+ libncurses5-dev automake libtool bison pkg-config libffi-dev
+</span></code></pre></notextile>
+
+Build and install Ruby:
+
+<notextile>
+<pre><code><span class="userinput">mkdir -p ~/src
+cd ~/src
+curl http://cache.ruby-lang.org/pub/ruby/2.1/ruby-2.1.6.tar.gz | tar xz
+cd ruby-2.1.6
+./configure --no-install-rdoc
+make
+sudo make install
+
+sudo gem install bundler</span>
+</code></pre></notextile>
{% include 'notebox_begin' %}
-This tutorial assumes either that you are logged into an Arvados VM instance (instructions for "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or you have installed the Arvados "Command line SDK":{{site.baseurl}}/sdk/cli/install.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation and have a "working environment.":{{site.baseurl}}/user/getting_started/check-environment.html
+This tutorial assumes that you are logged into an Arvados VM instance (instructions for "Webshell":{{site.baseurl}}/user/getting_started/vm-login-with-webshell.html or "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or you have installed the Arvados "Command line SDK":{{site.baseurl}}/sdk/cli/install.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation and have a "working environment.":{{site.baseurl}}/user/getting_started/check-environment.html
{% include 'notebox_end' %}
h2. Install prerequisites
-The Arvados package repository includes an API server package that can help automate much of the deployment. It requires:
+The Arvados package repository includes an API server package that can help automate much of the deployment.
-* PostgreSQL 9.0+
-* "Ruby 2.1 and bundler":install-manual-prerequisites-ruby.html
-* Build tools and the curl and PostgreSQL development libraries, to build gem dependencies
-* Nginx
+h3(#install_ruby_and_bundler). Install Ruby and Bundler
+
+{% include 'install_ruby_and_bundler' %}
+
+h3(#install_postgres). Install PostgreSQL
+
+{% include 'install_postgres' %}
+
+h3(#build_tools_apiserver). Build tools
On older distributions, you may need to use a backports repository to satisfy these requirements. For example, on older Red Hat-based systems, consider using the "postgresql92":https://www.softwarecollections.org/en/scls/rhscl/postgresql92/ and "nginx16":https://www.softwarecollections.org/en/scls/rhscl/nginx16/ Software Collections.
On a Debian-based system, install the following packages:
<notextile>
-<pre><code>~$ <span class="userinput">sudo apt-get install bison build-essential libpq-dev libcurl4-openssl-dev postgresql git nginx arvados-api-server</span>
+<pre><code>~$ <span class="userinput">sudo apt-get install bison build-essential libcurl4-openssl-dev git nginx arvados-api-server</span>
</code></pre>
</notextile>
On a Red Hat-based system, install the following packages:
<notextile>
-<pre><code>~$ <span class="userinput">sudo yum install bison make automake gcc gcc-c++ libcurl-devel postgresql-server postgresql-devel nginx git arvados-api-server</span>
+<pre><code>~$ <span class="userinput">sudo yum install bison make automake gcc gcc-c++ libcurl-devel nginx git arvados-api-server</span>
</code></pre>
</notextile>
-{% include 'notebox_begin' %}
-
-If you intend to use specific versions of these packages from Software Collections, you may have to adapt some of the package names to match; e.g., @postgresql92-postgresql-server postgresql92-postgresql-devel nginx16@.
-
-{% include 'notebox_end' %}
-
h2. Set up the database
Generate a new database password. Nobody ever needs to memorize it or type it, so we'll make a strong one:
<pre><code>~$ <span class="userinput">sudo mkdir -p /etc/arvados/api</span>
~$ <span class="userinput">sudo chmod 700 /etc/arvados/api</span>
~$ <span class="userinput">cd /var/www/arvados-api/current</span>
-/var/www/arvados-api/current$ <span class="userinput">sudo cp config/initializers/omniauth.rb.example /etc/arvados/api/omniauth.rb</span>
-/var/www/arvados-api/current$ <span class="userinput">sudo cp config/database.yml.sample /etc/arvados/api/database.yml</span>
+/var/www/arvados-api/current$ <span class="userinput">sudo cp config/database.yml.example /etc/arvados/api/database.yml</span>
/var/www/arvados-api/current$ <span class="userinput">sudo cp config/application.yml.example /etc/arvados/api/application.yml</span>
</code></pre>
</notextile>
Edit @/etc/arvados/api/database.yml@ and replace the @xxxxxxxx@ database password placeholders with the PostgreSQL password you generated above.
-h2. Configure the API server
+h2(#configure_application). Configure the API server
+
+Edit @/etc/arvados/api/application.yml@ to configure the settings described in the following sections. The deployment script will consistently deploy this to the API server's configuration directory. The API server reads both @application.yml@ and its own @config/application.default.yml@ file. The settings in @application.yml@ take precedence over the defaults that are defined in @config/application.default.yml@. The @config/application.yml.example@ file is not read by the API server and is provided as a starting template only.
-Edit @/etc/arvados/api/application.yml@ following the instructions below. The deployment script will consistently deploy this to the API server's configuration directory. The API server reads both @application.yml@ and its own @config/application.default.yml@ file. Values in @application.yml@ take precedence over the defaults that are defined in @config/application.default.yml@. The @config/application.yml.example@ file is not read by the API server and is provided for installation convenience only.
+@config/application.default.yml@ documents additional configuration settings not listed here. You can "view the current source version":https://arvados.org/projects/arvados/repository/revisions/master/entry/services/api/config/application.default.yml for reference.
-Always put your local configuration in @application.yml@ instead of editing @application.default.yml@.
+Only put local configuration in @application.yml@. Do not edit @application.default.yml@.
h3(#uuid_prefix). uuid_prefix
-Define your @uuid_prefix@ in @application.yml@ by setting the @uuid_prefix@ field in the section for your environment. This prefix is used for all database identifiers to identify the record as originating from this site. It must be exactly 5 alphanumeric characters (lowercase ASCII letters and digits).
+Define your @uuid_prefix@ in @application.yml@ by setting the @uuid_prefix@ field in the section for your environment. This prefix is used for all database identifiers to identify the record as originating from this site. It must be exactly 5 lowercase ASCII letters and digits.
-h3(#git_repositories_dir). git_repositories_dir
+Example @application.yml@:
+
+<notextile>
+<pre><code> uuid_prefix: <span class="userinput">zzzzz</span></code></pre>
+</notextile>
-This field defaults to @/var/lib/arvados/git@. You can override the value by defining it in @application.yml@.
+h3. secret_token
-Make sure a clone of the arvados repository exists in @git_repositories_dir@.
+The @secret_token@ is used for for signing cookies. IMPORTANT: This is a site secret. It should be at least 50 characters. Generate a random value and set it in @application.yml@:
<notextile>
-<pre><code>~$ <span class="userinput">sudo mkdir -p /var/lib/arvados/git</span>
-~$ <span class="userinput">sudo git clone --bare git://git.curoverse.com/arvados.git /var/lib/arvados/git/arvados.git</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
+yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
</code></pre></notextile>
-h3. secret_token
+Example @application.yml@:
+
+<notextile>
+<pre><code> secret_token: <span class="userinput">yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy</span></code></pre>
+</notextile>
+
+h3(#blob_signing_key). blob_signing_key
-Generate a new secret token for signing cookies:
+The @blob_signing_key@ is used to enforce access control to Keep blocks. This same key must be provided to the Keepstore daemons when "installing Keepstore servers.":install-keepstore.html IMPORTANT: This is a site secret. It should be at least 50 characters. Generate a random value and set it in @application.yml@:
<notextile>
<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
-zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
</code></pre></notextile>
-Then put that value in the @secret_token@ field.
+Example @application.yml@:
-h3. blob_signing_key
+<notextile>
+<pre><code> blob_signing_key: <span class="userinput">xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx</span></code></pre>
+</notextile>
+
+h3(#omniauth). sso_app_secret, sso_app_id, sso_provider_url
+
+The following settings enable the API server to communicate with the "Single Sign On (SSO) server":install-sso.html to authenticate user log in.
-If you want access control on your "Keepstore":install-keepstore.html server(s), you should set @blob_signing_key@ to the same value as the permission key you provide to your Keepstore daemon(s).
+Set @sso_provider_url@ to the base URL where your SSO server is installed. This should be a URL consisting of the scheme and host (and optionally, port), without a trailing slash.
+
+Set @sso_app_secret@ and @sso_app_id@ to the corresponding values for @app_secret@ and @app_id@ used in the "Create arvados-server client for Single Sign On (SSO)":install-sso.html#client step.
+
+Example @application.yml@:
+
+<notextile>
+<pre><code> sso_app_id: <span class="userinput">arvados-server</span>
+ sso_app_secret: <span class="userinput">wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww</span>
+ sso_provider_url: <span class="userinput">https://sso.example.com</span>
+</code></pre>
+</notextile>
h3. workbench_address
-Fill in the url of your workbench application in @workbench_address@, for example
+Set @workbench_address@ to the URL of your workbench application after following "Install Workbench.":install-workbench-app.html
+
+Example @application.yml@:
- https://workbench.@prefix_uuid@.your.domain
+<notextile>
+<pre><code> workbench_address: <span class="userinput">https://workbench.zzzzz.example.com</span></code></pre>
+</notextile>
-h3(#omniauth). sso_app_id, sso_app_secret, sso_provider_url
+h3. websockets_address
-For @sso_app_id@ and @sso_app_secret@, provide the same @app_id@ and @app_secret@ used in the "Create arvados-server client for Single Sign On (SSO)":install-sso.html#client step.
+Set @websockets_address@ to the @wss://@ URL of the API server websocket endpoint after following "Set up Web servers.":#set_up
-For @sso_provider_url@, provide the base URL where your SSO server is installed: just the scheme and host, with no trailing slash.
+Example @application.yml@:
<notextile>
-<pre><code> sso_app_id: arvados-server
- sso_app_secret: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
- sso_provider_url: https://sso.example.com
+<pre><code> websockets_address: <span class="userinput">wss://ws.zzzzz.example.com</span></code></pre>
+</notextile>
+
+h3(#git_repositories_dir). git_repositories_dir
+
+The @git_repositories_dir@ setting specifies the directory where user git repositories will be stored. By default this is @/var/lib/arvados/git@.
+
+Example @application.yml@:
+
+<notextile>
+<pre><code> git_repositories_dir: <span class="userinput">/var/lib/arvados/git</span>
</code></pre>
</notextile>
-h3. Other options
+Make sure a clone of the arvados repository exists in @git_repositories_dir@.
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo mkdir -p /var/lib/arvados/git</span>
+~$ <span class="userinput">sudo git clone --bare git://git.curoverse.com/arvados.git /var/lib/arvados/git/arvados.git</span>
+</code></pre></notextile>
-Consult @/var/www/arvados-api/current/config/application.default.yml@ for a full list of configuration options. (But don't edit it. Edit @application.yml@ instead.)
+h3(#git_internal_dir). git_internal_dir
+
+The @git_internal_dir@ setting specifies the location of Arvados' internal git repository. By default this is @/var/lib/arvados/internal.git@. This repository stores git commits that have been used to run Crunch jobs. It should _not_ be a subdirectory of @git_repositories_dir@.
+
+Example @application.yml@:
+
+<notextile>
+<pre><code> git_internal_dir: <span class="userinput">/var/lib/arvados/internal.git</span>
+</code></pre>
+</notextile>
h2. Prepare the API server deployment
This command aborts when it encounters an error. It's safe to rerun multiple times, so if there's a problem with your configuration, you can fix that and try again.
-h2. Set up Web servers
+h2(#set_up). Set up Web servers
For best performance, we recommend you use Nginx as your Web server front-end, with a Passenger backend for the main API server and a Puma backend for API server Websockets. To do that:
<pre><code>#!/bin/bash
set -e
+exec 2>&1
+
# Uncomment the line below if you're using RVM.
#source /etc/profile.d/rvm.sh
-envdir="/etc/sv/puma/env"
-root=/etc/sv/puma
-echo "Starting puma from ${root}"
-cd $root
-mkdir -p "${envdir}"
-exec 2>&1
+envdir="`pwd`/env"
+mkdir -p "$envdir"
+echo ws-only > "$envdir/ARVADOS_WEBSOCKETS"
+
cd /var/www/arvados-api/current
+echo "Starting puma in `pwd`"
+
# You may need to change arguments below to match your deployment, especially -u.
-exec chpst -e "${envdir}" -m 1073741824 -u www-data:www-data bundle exec puma -t 0:512 -e production -b tcp://127.0.0.1:8100
+exec chpst -m 1073741824 -u www-data:www-data -e "$envdir" \
+ bundle exec puma -t 0:512 -e production -b tcp://127.0.0.1:8100
</code></pre>
</li>
server {
listen <span class="userinput">[your public IP address]</span>:443 ssl;
- server_name <span class="userinput">uuid-prefix.your.domain</span>;
+ server_name <span class="userinput">uuid_prefix.your.domain</span>;
ssl on;
server {
listen <span class="userinput">[your public IP address]</span>:443 ssl;
- server_name ws.<span class="userinput">uuid-prefix.your.domain</span>;
+ server_name ws.<span class="userinput">uuid_prefix.your.domain</span>;
ssl on;
title: Install Git server
...
-The arv-git-httpd server provides HTTP access to hosted git repositories, using Arvados authentication tokens instead of passwords. It is intended to be installed on the system where your git repositories are stored, and accessed through a web proxy that provides SSL support.
+The arvados-git-httpd server provides HTTP access to hosted git repositories, using Arvados authentication tokens instead of passwords. It is intended to be installed on the system where your git repositories are stored, and accessed through a web proxy that provides SSL support.
By convention, we use the following hostname for the git service:
This hostname should resolve from anywhere on the internet.
-h2. Install arv-git-httpd
+h2. Install arvados-git-httpd
On Debian-based systems:
<notextile>
-<pre><code>~$ <span class="userinput">sudo apt-get install git arv-git-httpd</span>
+<pre><code>~$ <span class="userinput">sudo apt-get install git arvados-git-httpd</span>
</code></pre>
</notextile>
On Red Hat-based systems:
<notextile>
-<pre><code>~$ <span class="userinput">sudo yum install git arv-git-httpd</span>
+<pre><code>~$ <span class="userinput">sudo yum install git arvados-git-httpd</span>
</code></pre>
</notextile>
-Verify that @arv-git-httpd@ and @git-http-backend@ are functional:
+Verify that @arvados-git-httpd@ and @git-http-backend@ are functional:
<notextile>
-<pre><code>~$ <span class="userinput">arv-git-httpd -h</span>
+<pre><code>~$ <span class="userinput">arvados-git-httpd -h</span>
Usage of arv-git-httpd:
-address="0.0.0.0:80": Address to listen on, "host:port".
-git-command="/usr/bin/git": Path to git executable. Each authenticated request will execute this program with a single argument, "http-backend".
</code></pre>
</notextile>
-We recommend running @arv-git-httpd@ under "runit":http://smarden.org/runit/ or something similar.
+We recommend running @arvados-git-httpd@ under "runit":http://smarden.org/runit/ or something similar.
Your @run@ script should look something like this:
<notextile>
<pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
-exec sudo -u git arv-git-httpd -address=:9001 -git-command="$(which git)" -repo-root=<span class="userinput">/var/lib/arvados/git</span> 2>&1
+exec sudo -u git arvados-git-httpd -address=:9001 -git-command="$(which git)" -repo-root=<span class="userinput">/var/lib/arvados/git</span> 2>&1
</code></pre>
</notextile>
h3. Set up a reverse proxy with SSL support
-The arv-git-httpd service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
+The arvados-git-httpd service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
-This is best achieved by putting a reverse proxy with SSL support in front of arv-git-httpd, running on port 443 and passing requests to arv-git-httpd on port 9001 (or whatever port you chose in your run script).
+This is best achieved by putting a reverse proxy with SSL support in front of arvados-git-httpd, running on port 443 and passing requests to arvados-git-httpd on port 9001 (or whatever port you chose in your run script).
-h3. Tell the API server about the arv-git-httpd service
+h3. Tell the API server about the arvados-git-httpd service
In your API server's @config/application.yml@ file, add the following entry:
<notextile>
-<pre><code>git_http_base: git.<span class="userinput">uuid_prefix</span>.your.domain
+<pre><code>git_http_base: git.<span class="userinput">uuid_prefix.your.domain</span>
</code></pre>
</notextile>
{% include 'note_python27_sc' %}
-h2. Set up Docker
+h2. Install Docker
Compute nodes must have Docker installed to run jobs inside containers. This requires a relatively recent version of Linux (at least upstream version 3.10, or a distribution version with the appropriate patches backported). Follow the "Docker Engine installation documentation":https://docs.docker.com/ for your distribution.
For Debian-based systems, the Arvados package repository includes a backported @docker.io@ package with a known-good version you can install.
+h2. Configure Docker
+
+Crunch runs jobs in Docker containers with relatively little configuration. You may need to start the Docker daemon with specific options to make sure these jobs run smoothly in your environment. This section highlights options that are useful to most installations. Refer to the "Docker daemon reference":https://docs.docker.com/reference/commandline/daemon/ for complete information about all available options.
+
+The best way to configure these options varies by distribution.
+
+* If you're using our backported @docker.io@ package, you can list these options in the @DOCKER_OPTS@ setting in @/etc/default/docker.io@.
+* If you're using another Debian-based package, you can list these options in the @DOCKER_OPTS@ setting in @/etc/default/docker@.
+* On Red Hat-based distributions, you can list these options in the @other_args@ setting in @/etc/sysconfig/docker@.
+
+h3. Default ulimits
+
+Docker containers inherit ulimits from the Docker daemon. However, the ulimits for a single Unix daemon may not accommodate a long-running Crunch job. You may want to increase default limits for compute jobs by passing @--default-ulimit@ options to the Docker daemon. For example, to allow jobs to open 10,000 files, set @--default-ulimit nofile=10000:10000@.
+
+h3. DNS
+
+Your containers must be able to resolve the hostname in the ARVADOS_API_HOST environment variable (provided by the Crunch dispatcher) and any hostnames returned in Keep service records. If these names are not in public DNS records, you may need to set a DNS resolver for the containers by specifying the @--dns@ address with the IP address of an appropriate nameserver. You may specify this option more than once to use multiple nameservers.
+
h2. Set up SLURM
Install SLURM following "the same process you used to install the Crunch dispatcher":{{ site.baseurl }}/install/install-crunch-dispatch.html#slurm.
# A GNU/Linux (virtual) machine
# A working Docker installation (see "Installing Docker":https://docs.docker.com/installation/)
# A working Go installation (see "Install the Go tools":https://golang.org/doc/install)
-# A working Ruby installation (see "Install Ruby and bundler":install-manual-prerequisites-ruby.html)
+# A working Ruby installation, with the Bundler gem installed
+
+h3. Install Ruby and Bundler
+
+{% include 'install_ruby_and_bundler' %}
h2. Download the source tree
+++ /dev/null
----
-layout: default
-navsection: installguide
-title: Install Ruby and bundler
-...
-
-Currently, only Ruby 2.1 is supported.
-
-h2(#rvm). Option 1: Install with rvm
-
-<notextile>
-<pre><code>~$ <span class="userinput">gpg --keyserver hkp://keys.gnupg.net --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3</span>
-~$ <span class="userinput">\curl -sSL https://get.rvm.io | bash -s stable --ruby=2.1</span>
-~$ <span class="userinput">gem install bundler
-</span></code></pre></notextile>
-
-h2(#fromsource). Option 2: Install from source
-
-<notextile>
-<pre><code><span class="userinput">mkdir -p ~/src
-cd ~/src
-wget http://cache.ruby-lang.org/pub/ruby/2.1/ruby-2.1.6.tar.gz
-tar xzf ruby-2.1.6.tar.gz
-cd ruby-2.1.6
-./configure --no-rdoc --no-ri
-make
-sudo make install
-
-sudo gem install bundler</span>
-</code></pre></notextile>
</code></pre>
</notextile>
-h3. Debian
+h3. Debian and Ubuntu
-Packages are available for Debian 7 ("wheezy"). First, register the Curoverse signing key in apt's database:
+Packages are available for Debian 7 ("wheezy"), Ubuntu 12.04 ("precise"), and Ubuntu 14.04 ("trusty").
+
+First, register the Curoverse signing key in apt's database:
{% include 'install_debian_key' %}
-Then save the configuration line listed for your version of Debian in @/etc/apt/sources.list.d/arvados.list@:
+Configure apt to retrieve packages from the Arvados package repository. This command depends on your OS vendor and version:
table(table table-bordered table-condensed).
-|*Debian version*|*@/etc/apt/sources.list.d/arvados.list@*|
-|7 ("wheezy")|@deb http://apt.arvados.org/ wheezy main@|
+|OS version|Command|
+|Debian 7 ("wheezy")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
+|Ubuntu 12.04 ("precise")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ precise main" | sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
+|Ubuntu 14.04 ("trusty")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ trusty main" | sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
-Finally, add this new repository to apt's database:
+{% include 'notebox_begin' %}
-<notextile>
-<pre><code>~$ <span class="userinput">sudo /usr/bin/apt-get update</span>
-</code></pre>
-</notextile>
+Arvados packages for Ubuntu may depend on third-party packages in Ubuntu's "universe" repository. If you're installing on Ubuntu, make sure you have the universe sources uncommented in @/etc/apt/sources.list@.
-h3. Ubuntu
-
-Packages are available for Ubuntu 12.04 ("precise"). First, register the Curoverse signing key in apt's database:
-
-{% include 'install_debian_key' %}
+{% include 'notebox_end' %}
-Then save the configuration line listed for your version of Ubuntu in @/etc/apt/sources.list.d/arvados.list@:
+Retrieve the package list:
-table(table table-bordered table-condensed).
-|*Ubuntu version*|*@/etc/apt/sources.list.d/arvados.list@*|
-|12.04 ("precise")|@deb http://apt.arvados.org/ precise main@|
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get update</span>
+</code></pre>
+</notextile>
h2. A unique identifier
-Each Arvados installation should have a globally unique identifier, which is a unique 5-character alphanumeric string. Here is a snippet of ruby that generates such a string based on the hostname of your computer:
+Each Arvados installation should have a globally unique identifier, which is a unique 5-character lowercase alphanumeric string. For testing purposes, here is one way to make a random 5-character string:
-<pre>
-Digest::MD5.hexdigest(`hostname`).to_i(16).to_s(36)[0..4]
-</pre>
+<notextile>
+<pre><code>~$ <span class="userinput">tr -dc 0-9a-z </dev/urandom | head -c5; echo</span>
+</code></pre>
+</notextile>
You may also use a different method to pick the unique identifier. The unique identifier will be part of the hostname of the services in your Arvados cluster. The rest of this documentation will refer to it as your @uuid_prefix@.
</notextile>
{% include 'note_python27_sc' %}
+
+h2. Update Git Config
+
+Configure git to use the ARVADOS_API_TOKEN environment variable to authenticate to arv-git-httpd.
+
+Execute the following commands to setup the needed configuration.
+
+<notextile>
+<pre>
+<code>~$ <span class="userinput">git config 'credential.https://git.{{ site.arvados_api_host }}/.username' none</span></code>
+<code>~$ <span class="userinput">git config 'credential.https://git.{{ site.arvados_api_host }}/.helper' '!cred(){ cat >/dev/null; if [ "$1" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred'</span></code>
+</pre>
+</notextile>
h2(#dependencies). Install dependencies
-Make sure you have "Ruby and Bundler":install-manual-prerequisites-ruby.html installed.
+h3(#install_git_curl). Install git and curl
+
+{% include 'install_git_curl' %}
+
+h3(#install_ruby_and_bundler). Install Ruby and Bundler
+
+{% include 'install_ruby_and_bundler' %}
+
+h3(#install_postgres). Install PostgreSQL
+
+{% include 'install_postgres' %}
h2(#install). Install SSO server
<pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
~$ <span class="userinput">git clone https://github.com/curoverse/sso-devise-omniauth-provider.git</span>
~$ <span class="userinput">cd sso-devise-omniauth-provider</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">bundle install</span>
+~/sso-devise-omniauth-provider$ <span class="userinput">bundle install --without=development</span>
</code></pre></notextile>
h2. Configure the SSO server
<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/application.yml.example config/application.yml</span>
</code></pre></notextile>
-The SSO server reads the @config/application.yml@ file, as well as the @config/application.defaults.yml@ file. Values in @config/application.yml@ take precedence over the defaults that are defined in @config/application.defaults.yml@. The @config/application.yml.example@ file is not read by the SSO server and is provided for installation convenience, only.
+The SSO server reads the @config/application.yml@ file, as well as the @config/application.defaults.yml@ file. Values in @config/application.yml@ take precedence over the defaults that are defined in @config/application.defaults.yml@. The @config/application.yml.example@ file is not read by the SSO server and is provided for installation convenience only.
-Consult @config/application.default.yml@ for a full list of configuration options. Always put your local configuration in @config/application.yml@, never edit @config/application.default.yml@.
+Consult @config/application.default.yml@ for a full list of configuration options. Local configuration goes in @config/application.yml@, do not edit @config/application.default.yml@.
h3(#uuid_prefix). uuid_prefix
-Define your @uuid_prefix@ in @config/application.yml@ by setting the @uuid_prefix@ field in the section for your environment. This prefix is used for all database identifiers to identify the record as originating from this site. It must be exactly 5 alphanumeric characters (lowercase ASCII letters and digits).
+Generate a uuid prefix for the single sign on service. This prefix is used to identify user records as originating from this site. It must be exactly 5 lowercase ASCII letters and/or digits. You may use the following snippet to generate a uuid prefix:
+
+<notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts "#{rand(2**64).to_s(36)[0,5]}"'</span>
+abcde
+</code></pre></notextile>
+
+Edit @config/application.yml@ and set @uuid_prefix@ in the "common" section.
h3(#secret_token). secret_token
zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
</code></pre></notextile>
-Then put that value in the @secret_token@ field.
-
-h3(#authentication_methods). Authentication methods
-
-Three authentication methods are supported: google OAuth2, ldap, local accounts.
-
-h3(#google_oauth2). google_oauth2 authentication
-
-Google OAuth2 authentication can be configured with these options.
-
-<pre>
- # Google API tokens required for OAuth2 login.
- #
- # See https://github.com/zquestz/omniauth-google-oauth2
- #
- # and https://developers.google.com/accounts/docs/OAuth2
- google_oauth2_client_id: false
- google_oauth2_client_secret: false
-
- # Set this to your OpenId 2.0 realm to enable migration from Google OpenId
- # 2.0 to Google OAuth2 OpenId Connect (Google will provide OpenId 2.0 user
- # identifiers via the openid.realm parameter in the OAuth2 flow until 2017).
- google_openid_realm: false
-</pre>
-
-h3(#ldap). ldap authentication
-
-LDAP authentication can be configured with these options. Make sure to preserve the indentation of the fields beyond @use_ldap@.
-
-<pre>
- # Enable LDAP support.
- #
- # If you want to use LDAP, you need to provide
- # the following set of fields under the use_ldap key.
- #
- # use_ldap: false
- # title: Example LDAP
- # host: ldap.example.com
- # port: 636
- # method: ssl
- # base: "ou=Users, dc=example, dc=com"
- # uid: uid
- # email_domain: example.com
- # #bind_dn: "some_user"
- # #password: "some_password"
- use_ldap: false
-</pre>
-
-h3(#local_accounts). local account authentication
-
-If neither Google OAuth2 nor LDAP are enabled, the SSO server automatically
-falls back to local accounts. There are two configuration options for local
-accounts:
+Edit @config/application.yml@ and set @secret_token@ in the "common" section.
-<pre>
- # If true, allow new creation of new accounts in the SSO server's internal
- # user database.
- allow_account_registration: false
-
- # If true, send an email confirmation before activating new accounts in the
- # SSO server's internal user database.
- require_email_confirmation: false
-</pre>
+h2(#database). Set up the database
-You can also create local accounts on the SSO server from the rails console:
-
-<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
-:001 > <span class="userinput">user = User.new(:email => "test@example.com")</span>
-:002 > <span class="userinput">user.password = "passw0rd"</span>
-:003 > <span class="userinput">user.save!</span>
-:004 > <span class="userinput">quit</span>
-</code></pre>
-</notextile>
-
-h2. Set up the database
-
-Generate a new database password. Nobody ever needs to memorize it or type it, so we'll make a strong one:
+Generate a new database password. Nobody ever needs to memorize it or type it, so make a strong one:
<notextile>
<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
Enter it again: <span class="userinput">paste-database-password-you-generated</span>
</code></pre></notextile>
-Configure SSO server to connect to your database by creating and updating @config/database.yml@. Replace the @xxxxxxxx@ database password placeholders with the new password you generated above.
+Configure SSO server to connect to your database by creating and updating @config/database.yml@. Replace the @xxxxxxxx@ database password placeholders with the new password you generated above. If you are planning a production system, update the @production@ section, otherwise use @development@.
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/database.yml.sample config/database.yml</span>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/database.yml.example config/database.yml</span>
~/sso-devise-omniauth-provider$ <span class="userinput">edit config/database.yml</span>
</code></pre></notextile>
Alternatively, if the database user you intend to use for the SSO server is not allowed to create new databases, you can create the database first and then populate it with rake. Be sure to adjust the database name if you are using the @development@ environment. This sequence of commands is functionally equivalent to the rake db:setup command above:
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">su postgres createdb arvados_sso_production -E UTF8 -O arvados_sso</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:structure:load</span>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">sudo -u postgres createdb arvados_sso_production -E UTF8 -O arvados_sso -T template0</span>
+~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:schema:load</span>
~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:seed</span>
</code></pre></notextile>
-h2(#client). Generate assets
-
-If you are running in the production environment, you'll want to generate the assets:
-
-<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake assets:precompile</span>
-</code></pre>
-</notextile>
-
h2(#client). Create arvados-server client
-Use @rails console@ to create a @Client@ record that will be used by the Arvados API server. The values of @app_id@ and @app_secret@ correspond to the @APP_ID@ and @APP_SECRET@ that must be set in in "Setting up Omniauth in the API server.":install-api-server.html#omniauth
+Use @rails console@ to create a @Client@ record that will be used by the Arvados API server. The values of @app_id@ and @app_secret@ correspond to the values for @sso_app_id@ and @sso_app_secret@ in the "API server's SSO settings.":install-api-server.html#omniauth
<notextile>
<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
</code></pre>
</notextile>
-h2. Start the SSO server
+h2(#assets). Precompile assets
+
+If you are running in the production environment, you must precompile the assets:
+
+<notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake assets:precompile</span>
+</code></pre>
+</notextile>
+
+h2(#authentication_methods). Authentication methods
+
+Authentication methods are configured in @application.yml@. Currently three authentication methods are supported: local accounts, LDAP, and Google+. If neither Google+ nor LDAP are enabled, the SSO server defaults to local user accounts. Only one authentication mechanism should be in use at a time.
-h3. Run a simple standalone server
+h3(#local_accounts). Local account authentication
-You can use the Webrick server that is bundled with Ruby to quickly verify that your installation is functioning:
+There are two configuration options for local accounts:
+
+<pre>
+ # If true, allow new creation of new accounts in the SSO server's internal
+ # user database.
+ allow_account_registration: false
+
+ # If true, send an email confirmation before activating new accounts in the
+ # SSO server's internal user database (otherwise users are activated immediately.)
+ require_email_confirmation: false
+</pre>
+
+For more information about configuring backend support for sending email (required to send email confirmations) see "Configuring Action Mailer":http://guides.rubyonrails.org/configuring.html#configuring-action-mailer
+
+If @allow_account_registration@ is false, you may manually create local accounts on the SSO server from the rails console:
<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails server</span>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
+:001 > <span class="userinput">user = User.new(:email => "test@example.com")</span>
+:002 > <span class="userinput">user.password = "passw0rd"</span>
+:003 > <span class="userinput">user.save!</span>
+:004 > <span class="userinput">quit</span>
+</code></pre>
+</notextile>
+
+h3(#ldap). LDAP authentication
+
+The following options are available to configure LDAP authentication. Note that you must preserve the indentation of the fields listed under @use_ldap@.
+
+<pre>
+ use_ldap:
+ title: Example LDAP
+ host: ldap.example.com
+ port: 636
+ method: ssl
+ base: "ou=Users, dc=example, dc=com"
+ uid: uid
+ email_domain: example.com
+ #bind_dn: "some_user"
+ #password: "some_password"
+</pre>
+
+table(table).
+|_. Option|_. Description|
+|title |Title displayed to the user on the login page|
+|host |LDAP server hostname|
+|port |LDAP server port|
+|method|One of "plain", "ssl", "tls"|
+|base |Directory lookup base|
+|uid |User id field used for directory lookup|
+|email_domain|Strip off specified email domain from login and perform lookup on bare username|
+|bind_dn|If required by server, username to log with in before performing directory lookup|
+|password|If required by server, password to log with before performing directory lookup|
+
+h3(#google). Google+ authentication
+
+In order to use Google+ authentication, you must use the <a href="https://console.developers.google.com" target="_blank">Google Developers Console</a> to create a set of client credentials.
+
+# Go to the <a href="https://console.developers.google.com" target="_blank">Google Developers Console</a> and select or create a project; this will take you to the project page.
+# On the sidebar, click on *APIs & auth* then select *APIs*.
+## Search for *Contacts API* and click on *Enable API*.
+## Search for *Google+ API* and click on *Enable API*.
+# On the sidebar, click on *Credentials*; under *OAuth* click on *Create new Client ID* to bring up the *Create Client ID* dialog box.
+# Under *Application type* select *Web application*.
+# If the authorization origins are not displayed, clicking on *Create Client ID* will take you to *Consent screen* settings.
+## On consent screen settings, enter the appropriate details and click on *Save*.
+## This will return you to the *Create Client ID* dialog box.
+# You must set the authorization origins. Edit @sso.your-site.com@ to the appropriate hostname that you will use to access the SSO service:
+## JavaScript origin should be @https://sso.your-site.com/@
+## Redirect URI should be @https://sso.your-site.com/users/auth/google_oauth2/callback@
+# Copy the values of *Client ID* and *Client secret* from the Google Developers Console into the Google section of @config/application.yml@, like this:
+
+<notextile>
+<pre><code> # Google API tokens required for OAuth2 login.
+ google_oauth2_client_id: <span class="userinput">"---YOUR---CLIENT---ID---HERE--"-</span>
+ google_oauth2_client_secret: <span class="userinput">"---YOUR---CLIENT---SECRET---HERE--"-</span></code></pre></notextile>
+
+h2(#start). Start the SSO server
+
+h3. Run a standalone passenger server
+
+<notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production passenger start</span>
+=============== Phusion Passenger Standalone web server started ===============
+...
</code></pre>
</notextile>
-h3. Production environment
+You can now test your installation by going to the page reported by passenger as "Accessible via: ..."
+
+Note: if you get the following warning "you may safely ignore it:":https://stackoverflow.com/questions/10374871/no-secret-option-provided-to-racksessioncookie-warning
-As a Ruby on Rails application, the SSO server should be compatible with any Ruby application server that supports Rack applications. We recommend "Passenger":https://www.phusionpassenger.com/ to run the SSO server in production.
+<pre>
+Connecting to database specified by database.yml
+App 4574 stderr: SECURITY WARNING: No secret option provided to Rack::Session::Cookie.
+App 4574 stderr: This poses a security threat. It is strongly recommended that you
+App 4574 stderr: provide a secret to prevent exploits that may be possible from crafted
+App 4574 stderr: cookies. This will not be supported in future versions of Rack, and
+App 4574 stderr: future versions will even invalidate your existing user cookies.
+App 4574 stderr:
+App 4574 stderr: Called from: /var/lib/gems/2.1.0/gems/actionpack-3.2.8/lib/action_dispatch/middleware/session/abstract_store.rb:28:in `initialize'.
+App 4592 stdout:
+</pre>
h2. Install prerequisites
-The Arvados package repository includes Workbench server package that can help automate much of the deployment. It requires:
+The Arvados package repository includes Workbench server package that can help automate much of the deployment.
-* "Ruby 2.1 and bundler":install-manual-prerequisites-ruby.html
-* The Arvados Python SDK
-* Graphviz
-* Build tools to build gem dependencies
-* Nginx
+h3(#install_ruby_and_bundler). Install Ruby and Bundler
+
+{% include 'install_ruby_and_bundler' %}
+
+h3(#build_tools_workbench). Build tools
Workbench doesn't need its own database, so it does not need to have PostgreSQL installed.
To use the @arv@ command, you can either install the @arvados-cli@ gem via RubyGems or build and install the package from source.
-h4. Prerequisites: Ruby >= 2.1.0 and curl libraries
+h3. Prerequisites: Ruby, Bundler, and curl libraries
-Make sure you have "Ruby and bundler":{{site.baseurl}}/install/install-manual-prerequisites-ruby.html installed.
+{% include 'install_ruby_and_bundler' %}
Install curl libraries with your system's package manager. For example, on Debian or Ubuntu:
</pre>
</notextile>
-h4. Option 1: install with RubyGems
+h3. Option 1: Install with RubyGems
<notextile>
<pre>
</pre>
</notextile>
-h4. Option 2: build and install from source
+h3. Option 2: Build and install from source
<notextile>
<pre>
exit
--local Run locally using arv-run-pipeline-instance
--docker-image DOCKER_IMAGE
- Docker image to use, default arvados/jobs
+ Docker image to use, otherwise use instance default.
--ignore-rcode Commands that return non-zero return codes should not
be considered failed.
--no-reuse Do not reuse past jobs.
title: "Checking your environment"
...
-First, log into an Arvados VM instance (instructions for "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or install the Arvados "Command line SDK":{{site.baseurl}}/sdk/cli/install.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation.
+First, log into an Arvados VM instance (instructions for "Webshell":{{site.baseurl}}/user/getting_started/vm-login-with-webshell.html or "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or install the Arvados "Command line SDK":{{site.baseurl}}/sdk/cli/install.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation.
Check that you are able to access the Arvados API server using @arv user current@. If it is able to access the API server, it will print out information about your account:
title: Accessing an Arvados VM with SSH - Unix Environments
...
-This document is for Unix environments (Linux, OS X, Cygwin). If you are using a Windows environment, please visit the "Accessing an Arvados VM with SSH - Windows Environments":ssh-access-windows.html page.
+This document is for accessing an arvados VM using SSK keys in Unix environments (Linux, OS X, Cygwin). If you would like to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Windows environment, please visit the "Accessing an Arvados VM with SSH - Windows Environments":ssh-access-windows.html page.
{% include 'ssh_intro' %}
title: Accessing an Arvados VM with SSH - Windows Environments
...
-This document is for Windows environments. If you are using a Unix environment (Linux, OS X, Cygwin), please visit the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.
+This document is for accessing an arvados VM using SSK keys in Windows environments. If you would like to use to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Unix environment (Linux, OS X, Cygwin), please visit the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.
{% include 'ssh_intro' %}
--- /dev/null
+---
+layout: default
+navsection: userguide
+title: Accessing an Arvados VM with Webshell
+...
+
+This document describes how to access an Arvados VM with Webshell from Workbench.
+
+h2(#webshell). Access VM using webshell
+
+Webshell gives you access to an arvados virtual machine from your browser with no additional setup.
+
+In the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Manage account* to go to the account management page. In the *Manage account* page, you will see the *Virtual Machines* panel that lists the virtual machines you can access.
+
+Each row in the Virtual Machines panel lists the hostname of the VM, along with a <code>Log in as *you*</code> button under the column "Web shell beta". Clinking on this button will open up a webshell terminal for you in a new browser tab and log you in.
+
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/vm-access-with-webshell.png!
+
+You are now ready to work in your Arvados VM.
Access the Arvados Workbench using this link: "{{site.arvados_workbench_host}}/":{{site.arvados_workbench_host}}/ (Replace the hostname portion with the hostname of your local Arvados instance if necessary.)
-Open a shell on the system where you want to use the Arvados client. This may be your local workstation, or an Arvados virtual machine accessed with SSH (instructions for "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login).
+Open a shell on the system where you want to use the Arvados client. This may be your local workstation, or an Arvados virtual machine accessed with "Webshell":{{site.baseurl}}/user/getting_started/vm-login-with-webshell.html or SSH (instructions for "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login).
Click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access your account menu, then click on the menu item *Manage account* to go to the account management page. On the *Manage account* page, you will see the *Current Token* panel, which lists your current token and instructions to set up your environment.
--- /dev/null
+---
+layout: default
+navsection: userguide
+title: "Using arv-copy"
+...
+
+
+This tutorial describes how to copy Arvados objects from one cluster to another by using @arv-copy@.
+
+{% include 'tutorial_expectations' %}
+
+h2. arv-copy
+
+@arv-copy@ allows users to copy collections, pipeline templates, and pipeline instances from one cluster to another. By default, @arv-copy@ will recursively go through a template or instance and copy all dependencies associated with the object.
+
+For example, let's copy from our <a href="https://cloud.curoverse.com/">beta cloud instance *qr1hi*</a> to *dst_cluster*. The names *qr1hi* and *dst_cluster* are interchangable with any cluster name. You can find the cluster name from the prefix of the uuid of the object you want to copy. For example, in *qr1hi*-4zz18-tci4vn4fa95w0zx, the cluster name is qr1hi.
+
+In order for the clusters to be able to communicate with each other, you must create custom configuration files for both clusters. First, go to your *Manage account* page in Workbench and copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ in both of your clusters. Then, create two configuration files, one for each cluster. The names of the files must have the format of *uuid_prefix.conf*. In our example, let's make two files, one for *qr1hi* and one for *dst_cluster*. From your *Manage account* page in *qr1hi* and *dst_cluster*, copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@.
+
+!{display: block;margin-left: 25px;margin-right: auto;}{{ site.baseurl }}/images/api-token-host.png!
+
+Copy your @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ into the config files as shown below in the shell account from which you are executing the commands. For example, the default shell you may have access to is shell.qr1hi. You can add these files in ~/.config/arvados/ in the qr1hi shell terminal.
+
+<notextile>
+<pre><code>~$ <span class="userinput">cd ~/.config/arvados</span>
+~$ <span class="userinput">echo "ARVADOS_API_HOST=qr1hi.arvadosapi.com" >> qr1hi.conf</span>
+~$ <span class="userinput">echo "ARVADOS_API_TOKEN=123456789abcdefghijkl" >> qr1hi.conf</span>
+~$ <span class="userinput">echo "ARVADOS_API_HOST=dst_cluster.arvadosapi.com" >> dst_cluster.conf</span>
+~$ <span class="userinput">echo "ARVADOS_API_TOKEN=987654321lkjihgfedcba" >> dst_cluster.conf</span>
+</code></pre>
+</notextile>
+
+Now you're ready to copy between *qr1hi* and *dst_cluster*!
+
+h3. How to copy a collection
+
+First, select the uuid of the collection you want to copy from the source cluster. The uuid can be found in the collection display page in the collection summary area (top left box), or from the URL bar (the part after @collections/...@)
+
+Now copy the collection from *qr1hi* to *dst_cluster*. We will use the uuid @qr1hi-4zz18-tci4vn4fa95w0zx@ as an example. You can find this collection in the <a href="https://cloud.curoverse.com/collections/qr1hi-4zz18-tci4vn4fa95w0zx">lobSTR v.3 project on cloud.curoverse.com</a>.
+<notextile>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster qr1hi-4zz18-tci4vn4fa95w0zx</span>
+qr1hi-4zz18-tci4vn4fa95w0zx: 6.1M / 6.1M 100.0%
+arvados.arv-copy[1234] INFO: Success: created copy with uuid dst_cluster-4zz18-8765943210cdbae
+</code></pre>
+</notextile>
+
+The output of arv-copy displays the uuid of the collection generated in the destination cluster. By default, the output is placed in your home project in the destination cluster. If you want to place your collection in a pre-created project, you can specify the project you want it to be in using the tag @--project-uuid@ followed by the project uuid.
+
+For example, this will copy the collection to project dst_cluster-j7d0g-a894213ukjhal12 in the destination cluster.
+
+<notextile> <pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --project-uuid dst_cluster-j7d0g-a894213ukjhal12 qr1hi-4zz18-tci4vn4fa95w0zx</span>
+</code></pre>
+</notextile>
+
+h3. How to copy a pipeline template or pipeline instance
+
+{% include 'arv_copy_expectations' %}
+
+We will use the uuid @qr1hi-d1hrv-nao0ohw8y7dpf84@ as an example pipeline instance.
+
+<notextile>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial qr1hi-d1hrv-nao0ohw8y7dpf84</span>
+To git@git.dst_cluster.arvadosapi.com:$USER/tutorial.git
+ * [new branch] git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d -> git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d
+arvados.arv-copy[19694] INFO: Success: created copy with uuid dst_cluster-d1hrv-rym2h5ub9m8ofwj
+</code></pre>
+</notextile>
+
+New branches in the destination git repo will be created for each branch used in the pipeline template. For example, if your source branch was named ac21f0d45a76294aaca0c0c0fdf06eb72d03368d, your new branch will be named @git_git_qr1hi_arvadosapi_com_reponame_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d@.
+
+By default, if you copy a pipeline instance recursively, you will find that the template as well as all the dependencies are in your home project.
+
+If you would like to copy the object without dependencies, you can use the @--no-recursive@ tag.
+
+For example, we can copy the same object using this tag.
+
+<notextile>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial --no-recursive qr1hi-d1hrv-nao0ohw8y7dpf84</span>
+</code></pre>
+</notextile>
h2. Additional options
-* @--docker-image IMG@ : By default, commands run inside a Docker container created from the latest "arvados/jobs" Docker image. Use this option to specify a different image to use. Note: the Docker image must be uploaded to Arvados using @arv keep docker@.
+* @--docker-image IMG@ : By default, commands run based in a container created from the @default_docker_image_for_jobs@ setting on the API server. Use this option to specify a different image to use. Note: the Docker image must be uploaded to Arvados using @arv keep docker@.
* @--dry-run@ : Print out the final Arvados pipeline generated by @arv-run@ without submitting it.
* @--local@ : By default, the pipeline will be submitted to your configured Arvados instance. Use this option to run the command locally using @arv-run-pipeline-instance --run-jobs-here@.
* @--ignore-rcode@ : Some commands use non-zero exit codes to indicate nonfatal conditions (e.g. @grep@ returns 1 when no match is found). Set this to indicate that commands that return non-zero return codes should not be considered failed.
In the *Manage account* page, you will see the *Repositories* panel with the *Add new repository* button.
-!{{ site.baseurl }}/images/repositories-panel.png!
+!{display: block;margin-left: 25px;margin-right: auto;}{{ site.baseurl }}/images/repositories-panel.png!
Click the *Add new Repository* button to open the popup to add a new arvados repository. You will see a text box where you can enter the name of the repository. Enter *tutorial* in this text box and click on *Create*.
The name you enter here must begin with a letter and can only contain alphanumeric characters.
{% include 'notebox_end' %}
-!{{ site.baseurl }}/images/add-new-repository.png!
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/add-new-repository.png!
-This will create a new repository with the name @$USER/tutorial@ with the URL <notextile><code>git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</code></notextile>
+This will create a new repository with the name @$USER/tutorial@. It can be accessed using the URL <notextile><code>https://git.{{ site.arvados_api_host }}/$USER/tutorial.git</code></notextile> or <notextile><code>git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</code></notextile>
-Back in the *Repositories* panel in the *Manage account* page, you should see the @$USER/tutorial@ repository listed in the name column with the URL <notextile><code>git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</code></notextile>.
+Back in the *Repositories* panel in the *Manage account* page, you should see the @$USER/tutorial@ repository listed in the name column with these URLs.
+
+!{display: block;margin-left: 25px;margin-right: auto;}{{ site.baseurl }}/images/added-new-repository.png!
You are now ready to use this *tutorial* repository to run your crunch scripts.
On the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Manage account* to go to the account management page.
-On the *Manage account* page, you will see *Repositories* panel. In this panel, you should see the @$USER/tutorial@ repository listed in the *name* column. Next to *name* is the column *URL*. Copy the *URL* value associated with your repository. This should look like <notextile><code>git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</code></notextile>.
+On the *Manage account* page, you will see *Repositories* panel. In this panel, you should see the @$USER/tutorial@ repository listed in the *name* column. Next to *name* is the column *URL*. Copy the *URL* value associated with your repository. This should look like <notextile><code>https://git.{{ site.arvados_api_host }}/$USER/tutorial.git</code></notextile>. Alternatively, you can use <notextile><code>git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</code></notextile>
Next, on the Arvados virtual machine, clone your Git repository:
<notextile>
<pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
-~$ <span class="userinput">git clone git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</span>
+~$ <span class="userinput">git clone https://git.{{ site.arvados_api_host }}/$USER/tutorial.git</span>
Cloning into 'tutorial'...</code></pre>
</notextile>
This will create a Git repository in the directory called @tutorial@ in your home directory. Say yes when prompted to continue with connection.
Ignore any warning that you are cloning an empty repository.
+*Note:* If you are prompted for username and password when you try to git clone using this command, you may first need to update your git configuration. Execute the following commands to update your git configuration.
+
+<notextile>
+<pre>
+<code>~$ <span class="userinput">git config 'credential.https://git.{{ site.arvados_api_host }}/.username' none</span></code>
+<code>~$ <span class="userinput">git config 'credential.https://git.{{ site.arvados_api_host }}/.helper' '!cred(){ cat >/dev/null; if [ "$1" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred'</span></code>
+</pre>
+</notextile>
+
{% include 'notebox_begin' %}
For more information about using Git, try
DEBIAN_IMAGE := $(shell $(DOCKER) images -q arvados/debian |head -n1)
REALCLEAN_CONTAINERS := $(shell $(DOCKER) ps -a |grep -e arvados -e api_server -e keep_server -e keep_proxy_server -e doc_server -e workbench_server |cut -f 1 -d' ')
-REALCLEAN_IMAGES := $(shell $(DOCKER) images -q arvados/* |grep -v $(DEBIAN_IMAGE) 2>/dev/null)
-DEEPCLEAN_IMAGES := $(shell $(DOCKER) images -q arvados/*)
+# Generate a list of docker images tagged as arvados/*
+# but exclude those tagged as arvados/build
+ADI_TEMPFILE := $(shell mktemp)
+ARVADOS_DOCKER_IMAGES := $(shell $(DOCKER) images -q arvados/* |sort > $(ADI_TEMPFILE))
+ABDI_TEMPFILE := $(shell mktemp)
+ARVADOS_BUILD_DOCKER_IMAGES := $(shell $(DOCKER) images -q arvados/build |sort > $(ABDI_TEMPFILE))
+REALCLEAN_IMAGES := $(shell comm -3 $(ADI_TEMPFILE) $(ABDI_TEMPFILE) |grep -v $(DEBIAN_IMAGE) 2>/dev/null)
+DEEPCLEAN_IMAGES := $(shell comm -3 $(ADI_TEMPFILE) $(ABDI_TEMPFILE))
SKYDNS_CONTAINERS := $(shell $(DOCKER) ps -a |grep -e crosbymichael/skydns -e crosbymichael/skydock |cut -f 1 -d' ')
SKYDNS_IMAGES := $(shell $(DOCKER) images -q crosbymichael/skyd*)
my $job_api_token;
my $no_clear_tmp;
my $resume_stash;
-my $docker_bin = "/usr/bin/docker.io";
+my $docker_bin = "docker.io";
GetOptions('force-unlock' => \$force_unlock,
'git-dir=s' => \$git_dir,
'job=s' => \$jobspec,
}
else
{
- $Job = JSON::decode_json($jobspec);
- $local_job = 1;
+ $local_job = JSON::decode_json($jobspec);
}
# at least able to run basic commands: they aren't down or severely
# misconfigured.
my $cmd = ['true'];
-if ($Job->{docker_image_locator}) {
+if (($Job || $local_job)->{docker_image_locator}) {
$cmd = [$docker_bin, 'ps', '-q'];
}
Log(undef, "Sanity check is `@$cmd`");
{
if (!$resume_stash)
{
- map { croak ("No $_ specified") unless $Job->{$_} }
+ map { croak ("No $_ specified") unless $local_job->{$_} }
qw(script script_version script_parameters);
}
- $Job->{'is_locked_by_uuid'} = $User->{'uuid'};
- $Job->{'started_at'} = gmtime;
- $Job->{'state'} = 'Running';
+ $local_job->{'is_locked_by_uuid'} = $User->{'uuid'};
+ $local_job->{'started_at'} = gmtime;
+ $local_job->{'state'} = 'Running';
- $Job = api_call("jobs/create", job => $Job);
+ $Job = api_call("jobs/create", job => $local_job);
}
$job_id = $Job->{'uuid'};
# TODO: When #5036 is done and widely deployed, we can get rid of the
# regular expression and just unmount everything with type fuse.keep.
srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
- ['bash', '-ec', 'mount -t fuse,fuse.keep | awk \'($3 ~ /\ykeep\y/){print $3}\' | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
+ ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk \'($3 ~ /\ykeep\y/){print $3}\' | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
exit (1);
}
while (1)
freeze_if_want_freeze ($cleanpid);
select (undef, undef, undef, 0.1);
}
- Log (undef, "Cleanup command exited ".exit_status_s($?));
+ if ($?) {
+ Log(undef, "Clean work dirs: exit ".exit_status_s($?));
+ exit(EX_RETRY_UNLOCKED);
+ }
}
# If this job requires a Docker image, install that.
unless ($? == 0 && $sha1 =~ /^([0-9a-f]{40})$/) {
croak("`$gitcmd rev-list` exited "
.exit_status_s($?)
- .", '$treeish' not found. Giving up.");
+ .", '$treeish' not found, giving up");
}
$commit = $1;
Log(undef, "Version $treeish is commit $commit");
$command .= "--memory=\${MEMLIMIT}k --memory-swap=\${SWAPLIMIT}k ";
}
- # Dynamically configure the container to use the host system as its
- # DNS server. Get the host's global addresses from the ip command,
- # and turn them into docker --dns options using gawk.
- $command .=
- q{$(ip -o address show scope global |
- gawk 'match($4, /^([0-9\.:]+)\//, x){print "--dns", x[1]}') };
-
# The source tree and $destdir directory (which we have
# installed on the worker host) are available in the container,
# under the same path.
update_progress_stats();
select (undef, undef, undef, 0.1);
}
- elsif (time - $progress_stats_updated >= 30)
+ elsif (time - $progress_stats_updated >= 30 || $progress_is_dirty)
{
update_progress_stats();
}
$progress_stats_updated = time;
return if !$progress_is_dirty;
my ($todo, $done, $running) = (scalar @jobstep_todo,
- scalar @jobstep_done,
- scalar @slot - scalar @freeslot - scalar @holdslot);
+ scalar @jobstep_done,
+ scalar keys(%proc));
$Job->{'tasks_summary'} ||= {};
$Job->{'tasks_summary'}->{'todo'} = $todo;
$Job->{'tasks_summary'}->{'done'} = $done;
--- /dev/null
+#!/bin/sh
+echo >&2 Failing mount stub was called
+exit 1
--- /dev/null
+#!/bin/sh
+true
--- /dev/null
+#!/bin/sh
+exit 8
--- /dev/null
+#!/bin/sh
+exit 7
end
def test_small_collection
- skip "Waiting unitl #4534 is implemented"
-
uuid = Digest::MD5.hexdigest(foo_manifest) + '+' + foo_manifest.size.to_s
out, err = capture_subprocess_io do
assert_arv('--format', 'uuid', 'collection', 'create', '--collection', {
end
def test_file_to_dev_stdout
- skip "Waiting unitl #4534 is implemented"
-
test_file_to_stdout('/dev/stdout')
end
def test_file_to_stdout(specify_stdout_as='-')
- skip "Waiting unitl #4534 is implemented"
-
out, err = capture_subprocess_io do
assert_arv_get @@foo_manifest_locator + '/foo', specify_stdout_as
end
end
def test_file_to_file
- skip "Waiting unitl #4534 is implemented"
-
remove_tmp_foo
out, err = capture_subprocess_io do
assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/foo'
end
def test_file_to_file_no_overwrite_file
- skip "Waiting unitl #4534 is implemented"
File.open './tmp/foo', 'wb' do |f|
f.write 'baz'
end
out, err = capture_subprocess_io do
assert_arv_get false, @@foo_manifest_locator + '/foo', 'tmp/foo'
end
- assert_match /Error:/, err
+ assert_match /Local file tmp\/foo already exists/, err
assert_equal '', out
assert_equal 'baz', IO.read('tmp/foo')
end
def test_file_to_file_no_overwrite_file_in_dir
- skip "Waiting unitl #4534 is implemented"
File.open './tmp/foo', 'wb' do |f|
f.write 'baz'
end
out, err = capture_subprocess_io do
assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/'
end
- assert_match /Error:/, err
+ assert_match /Local file tmp\/foo already exists/, err
assert_equal '', out
assert_equal 'baz', IO.read('tmp/foo')
end
def test_file_to_file_force_overwrite
- skip "Waiting unitl #4534 is implemented"
-
File.open './tmp/foo', 'wb' do |f|
f.write 'baz'
end
end
def test_file_to_file_skip_existing
- skip "Waiting unitl #4534 is implemented"
-
File.open './tmp/foo', 'wb' do |f|
f.write 'baz'
end
end
def test_file_to_dir
- skip "Waiting unitl #4534 is implemented"
-
remove_tmp_foo
out, err = capture_subprocess_io do
assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/'
end
def test_nonexistent_block
- skip "Waiting unitl #4534 is implemented"
-
out, err = capture_subprocess_io do
- assert_arv_get false, 'f1554a91e925d6213ce7c3103c5110c6'
+ assert_arv_get false, 'e796ab2294f3e48ec709ffa8d6daf58c'
end
assert_equal '', out
assert_match /Error:/, err
end
def test_nonexistent_manifest
- skip "Waiting unitl #4534 is implemented"
-
out, err = capture_subprocess_io do
- assert_arv_get false, 'f1554a91e925d6213ce7c3103c5110c6/', 'tmp/'
+ assert_arv_get false, 'acbd18db4cc2f85cedef654fccc4a4d8/', 'tmp/'
end
assert_equal '', out
assert_match /Error:/, err
end
def test_manifest_root_to_dir
- skip "Waiting unitl #4534 is implemented"
-
remove_tmp_foo
out, err = capture_subprocess_io do
assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp/'
end
def test_manifest_root_to_dir_noslash
- skip "Waiting unitl #4534 is implemented"
-
remove_tmp_foo
out, err = capture_subprocess_io do
assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp'
end
def test_display_md5sum
- skip "Waiting unitl #4534 is implemented"
-
remove_tmp_foo
out, err = capture_subprocess_io do
assert_arv_get '-r', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
end
def test_md5sum_nowrite
- skip "Waiting unitl #4534 is implemented"
-
remove_tmp_foo
out, err = capture_subprocess_io do
assert_arv_get '-n', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
end
def test_sha1_nowrite
- skip "Waiting unitl #4534 is implemented"
-
remove_tmp_foo
out, err = capture_subprocess_io do
assert_arv_get '-n', '-r', '--hash', 'sha1', @@foo_manifest_locator+'/', 'tmp/'
end
def test_block_to_file
- skip "Waiting unitl #4534 is implemented"
-
remove_tmp_foo
out, err = capture_subprocess_io do
assert_arv_get @@foo_manifest_locator, 'tmp/foo'
end
def test_create_directory_tree
- skip "Waiting unitl #4534 is implemented"
-
`rm -rf ./tmp/arv-get-test/`
Dir.mkdir './tmp/arv-get-test'
out, err = capture_subprocess_io do
end
def test_create_partial_directory_tree
- skip "Waiting unitl #4534 is implemented"
-
`rm -rf ./tmp/arv-get-test/`
Dir.mkdir './tmp/arv-get-test'
out, err = capture_subprocess_io do
end
def test_raw_stdin
- skip "Waiting unitl #4534 is implemented"
-
out, err = capture_subprocess_io do
r,w = IO.pipe
wpid = fork do
end
def test_raw_file
- skip "Waiting unitl #4534 is implemented"
-
out, err = capture_subprocess_io do
assert arv_put('--raw', './tmp/foo')
end
end
def test_raw_empty_file
- skip "Waiting unitl #4534 is implemented"
-
out, err = capture_subprocess_io do
assert arv_put('--raw', './tmp/empty_file')
end
end
def test_filename_arg_with_empty_file
- skip "Waiting unitl #4534 is implemented"
-
out, err = capture_subprocess_io do
assert arv_put('--filename', 'foo', './tmp/empty_file')
end
end
def test_as_stream
- skip "Waiting unitl #4534 is implemented"
-
out, err = capture_subprocess_io do
assert arv_put('--as-stream', './tmp/foo')
end
end
def test_progress
- skip "Waiting unitl #4534 is implemented"
-
out, err = capture_subprocess_io do
assert arv_put('--manifest', '--progress', './tmp/foo')
end
end
def test_batch_progress
- skip "Waiting unitl #4534 is implemented"
-
out, err = capture_subprocess_io do
assert arv_put('--manifest', '--batch-progress', './tmp/foo')
end
end
def test_read_from_implicit_stdin
- skip "Waiting unitl #4534 is implemented"
-
test_read_from_stdin(specify_stdin_as='--manifest')
end
def test_read_from_dev_stdin
- skip "Waiting unitl #4534 is implemented"
-
test_read_from_stdin(specify_stdin_as='/dev/stdin')
end
def test_read_from_stdin(specify_stdin_as='-')
- skip "Waiting unitl #4534 is implemented"
-
out, err = capture_subprocess_io do
r,w = IO.pipe
wpid = fork do
end
def test_read_from_implicit_stdin_implicit_manifest
- skip "Waiting unitl #4534 is implemented"
-
test_read_from_stdin_implicit_manifest(specify_stdin_as=nil,
expect_filename='stdin')
end
def test_read_from_dev_stdin_implicit_manifest
- skip "Waiting unitl #4534 is implemented"
-
test_read_from_stdin_implicit_manifest(specify_stdin_as='/dev/stdin')
end
def test_read_from_stdin_implicit_manifest(specify_stdin_as='-',
expect_filename=nil)
- skip "Waiting unitl #4534 is implemented"
-
expect_filename = expect_filename || specify_stdin_as.split('/').last
out, err = capture_subprocess_io do
r,w = IO.pipe
end
def test_run_pipeline_instance_get_help
- skip "Waiting unitl #4534 is implemented"
-
out, err = capture_subprocess_io do
system ('arv-run-pipeline-instance -h')
end
class TestArvTag < Minitest::Test
def test_no_args
- skip "Waiting unitl #4534 is implemented"
+ skip "Waiting until #4534 is implemented"
# arv-tag exits with failure if run with no args
out, err = capture_subprocess_io do
--- /dev/null
+require 'minitest/autorun'
+
+class TestCrunchJob < Minitest::Test
+ SPECIAL_EXIT = {
+ EX_RETRY_UNLOCKED: 93,
+ EX_TEMPFAIL: 75,
+ }
+
+ JOBSPEC = {
+ grep_local: {
+ script: 'grep',
+ script_version: 'master',
+ repository: File.absolute_path('../../../..', __FILE__),
+ script_parameters: {foo: 'bar'},
+ },
+ }
+
+ def setup
+ end
+
+ def crunchjob
+ File.absolute_path '../../bin/crunch-job', __FILE__
+ end
+
+ # Return environment suitable for running crunch-job.
+ def crunchenv opts={}
+ env = ENV.to_h
+ env['CRUNCH_REFRESH_TRIGGER'] =
+ File.absolute_path('../../../../tmp/crunch-refresh-trigger', __FILE__)
+ env
+ end
+
+ def jobspec label
+ JOBSPEC[label].dup
+ end
+
+ # Encode job record to json and run it with crunch-job.
+ #
+ # opts[:binstubs] is an array of X where ./binstub_X is added to
+ # PATH in order to mock system programs.
+ def tryjobrecord jobrecord, opts={}
+ env = crunchenv
+ (opts[:binstubs] || []).each do |binstub|
+ env['PATH'] = File.absolute_path('../binstub_'+binstub, __FILE__) + ':' + env['PATH']
+ end
+ system env, crunchjob, '--job', jobrecord.to_json
+ end
+
+ def test_bogus_json
+ out, err = capture_subprocess_io do
+ system crunchenv, crunchjob, '--job', '"}{"'
+ end
+ assert_equal false, $?.success?
+ # Must not conflict with our special exit statuses
+ assert_jobfail $?
+ assert_match /JSON/, err
+ end
+
+ def test_fail_sanity_check
+ out, err = capture_subprocess_io do
+ j = {}
+ tryjobrecord j, binstubs: ['sanity_check']
+ end
+ assert_equal 75, $?.exitstatus
+ assert_match /Sanity check failed: 7/, err
+ end
+
+ def test_fail_docker_sanity_check
+ out, err = capture_subprocess_io do
+ j = {}
+ j[:docker_image_locator] = '4d449b9d34f2e2222747ef79c53fa3ff+1234'
+ tryjobrecord j, binstubs: ['sanity_check']
+ end
+ assert_equal 75, $?.exitstatus
+ assert_match /Sanity check failed: 8/, err
+ end
+
+ def test_no_script_specified
+ out, err = capture_subprocess_io do
+ j = jobspec :grep_local
+ j.delete :script
+ tryjobrecord j
+ end
+ assert_match /No script specified/, err
+ assert_jobfail $?
+ end
+
+ def test_fail_clean_tmp
+ out, err = capture_subprocess_io do
+ j = jobspec :grep_local
+ tryjobrecord j, binstubs: ['clean_fail']
+ end
+ assert_match /Failing mount stub was called/, err
+ assert_match /Clean work dirs: exit 1\n$/, err
+ assert_equal SPECIAL_EXIT[:EX_RETRY_UNLOCKED], $?.exitstatus
+ end
+
+ def test_docker_image_missing
+ skip 'API bug: it refuses to create this job in Running state'
+ out, err = capture_subprocess_io do
+ j = jobspec :grep_local
+ j[:docker_image_locator] = '4d449b9d34f2e2222747ef79c53fa3ff+1234'
+ tryjobrecord j, binstubs: ['docker_noop']
+ end
+ assert_match /No Docker image hash found from locator/, err
+ assert_jobfail $?
+ end
+
+ def test_script_version_not_found_in_repository
+ bogus_version = 'f8b72707c1f5f740dbf1ed56eb429a36e0dee770'
+ out, err = capture_subprocess_io do
+ j = jobspec :grep_local
+ j[:script_version] = bogus_version
+ tryjobrecord j
+ end
+ assert_match /'#{bogus_version}' not found, giving up/, err
+ assert_jobfail $?
+ end
+
+ # Ensure procstatus is not interpreted as a temporary infrastructure
+ # problem. Would be assert_http_4xx if this were http.
+ def assert_jobfail procstatus
+ refute_includes SPECIAL_EXIT.values, procstatus.exitstatus
+ assert_equal false, procstatus.success?
+ end
+end
--- /dev/null
+../python/.gitignore
\ No newline at end of file
--- /dev/null
+Arvados Common Workflow Language (CWL) runner.
--- /dev/null
+#!/usr/bin/env python
+
+import argparse
+import arvados
+import arvados.events
+import arvados.commands.keepdocker
+import arvados.commands.run
+import cwltool.draft2tool
+import cwltool.workflow
+import cwltool.main
+import threading
+import cwltool.docker
+import fnmatch
+import logging
+import re
+import os
+from cwltool.process import get_feature
+
+logger = logging.getLogger('arvados.cwl-runner')
+logger.setLevel(logging.INFO)
+
+def arv_docker_get_image(api_client, dockerRequirement, pull_image):
+ if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement:
+ dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"]
+
+ sp = dockerRequirement["dockerImageId"].split(":")
+ image_name = sp[0]
+ image_tag = sp[1] if len(sp) > 1 else None
+
+ images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
+ image_name=image_name,
+ image_tag=image_tag)
+
+ if not images:
+ imageId = cwltool.docker.get_image(dockerRequirement, pull_image)
+ args = [image_name]
+ if image_tag:
+ args.append(image_tag)
+ arvados.commands.keepdocker.main(args)
+
+ return dockerRequirement["dockerImageId"]
+
+class CollectionFsAccess(cwltool.draft2tool.StdFsAccess):
+ def __init__(self, basedir):
+ self.collections = {}
+ self.basedir = basedir
+
+ def get_collection(self, path):
+ p = path.split("/")
+ if arvados.util.keep_locator_pattern.match(p[0]):
+ if p[0] not in self.collections:
+ self.collections[p[0]] = arvados.collection.CollectionReader(p[0])
+ return (self.collections[p[0]], "/".join(p[1:]))
+ else:
+ return (None, path)
+
+ def _match(self, collection, patternsegments, parent):
+ ret = []
+ for filename in collection:
+ if fnmatch.fnmatch(filename, patternsegments[0]):
+ cur = os.path.join(parent, filename)
+ if len(patternsegments) == 1:
+ ret.append(cur)
+ else:
+ ret.extend(self._match(collection[filename], patternsegments[1:], cur))
+ return ret
+
+ def glob(self, pattern):
+ collection, rest = self.get_collection(pattern)
+ patternsegments = rest.split("/")
+ return self._match(collection, patternsegments, collection.manifest_locator())
+
+ def open(self, fn, mode):
+ collection, rest = self.get_collection(fn)
+ if collection:
+ return collection.open(rest, mode)
+ else:
+ return open(self._abs(fn), mode)
+
+ def exists(self, fn):
+ collection, rest = self.get_collection(fn)
+ if collection:
+ return collection.exists(rest)
+ else:
+ return os.path.exists(self._abs(fn))
+
+class ArvadosJob(object):
+ def __init__(self, runner):
+ self.arvrunner = runner
+ self.running = False
+
+ def run(self, dry_run=False, pull_image=True, **kwargs):
+ script_parameters = {
+ "command": self.command_line
+ }
+ runtime_constraints = {}
+
+ if self.generatefiles:
+ vwd = arvados.collection.Collection()
+ for t in self.generatefiles:
+ if isinstance(self.generatefiles[t], dict):
+ src, rest = self.arvrunner.fs_access.get_collection(self.generatefiles[t]["path"][6:])
+ vwd.copy(rest, t, source_collection=src)
+ else:
+ with vwd.open(t, "w") as f:
+ f.write(self.generatefiles[t])
+ vwd.save_new()
+ script_parameters["task.vwd"] = vwd.portable_data_hash()
+
+ script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"}
+ if self.environment:
+ script_parameters["task.env"].update(self.environment)
+
+ if self.stdin:
+ script_parameters["task.stdin"] = self.pathmapper.mapper(self.stdin)[1]
+
+ if self.stdout:
+ script_parameters["task.stdout"] = self.stdout
+
+ (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
+ if docker_req and kwargs.get("use_container") is not False:
+ runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image)
+ runtime_constraints["arvados_sdk_version"] = "master"
+
+ response = self.arvrunner.api.jobs().create(body={
+ "script": "run-command",
+ "repository": "arvados",
+ "script_version": "master",
+ "script_parameters": script_parameters,
+ "runtime_constraints": runtime_constraints
+ }, find_or_create=kwargs.get("enable_reuse", True)).execute()
+
+ self.arvrunner.jobs[response["uuid"]] = self
+
+ logger.info("Job %s is %s", response["uuid"], response["state"])
+
+ if response["state"] in ("Complete", "Failed", "Cancelled"):
+ self.done(response)
+
+ def done(self, record):
+ try:
+ if record["state"] == "Complete":
+ processStatus = "success"
+ else:
+ processStatus = "permanentFail"
+
+ try:
+ outputs = {}
+ outputs = self.collect_outputs(record["output"])
+ except Exception as e:
+ logger.warn(str(e))
+ processStatus = "permanentFail"
+
+ self.output_callback(outputs, processStatus)
+ finally:
+ del self.arvrunner.jobs[record["uuid"]]
+
+class ArvPathMapper(cwltool.pathmapper.PathMapper):
+ def __init__(self, arvrunner, referenced_files, basedir, **kwargs):
+ self._pathmap = {}
+ uploadfiles = []
+
+ pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+/.+')
+
+ for src in referenced_files:
+ if isinstance(src, basestring) and pdh_path.match(src):
+ self._pathmap[src] = (src, "/keep/%s" % src)
+ else:
+ ab = src if os.path.isabs(src) else os.path.join(basedir, src)
+ st = arvados.commands.run.statfile("", ab)
+ if kwargs.get("conformance_test"):
+ self._pathmap[src] = (src, ab)
+ elif isinstance(st, arvados.commands.run.UploadFile):
+ uploadfiles.append((src, ab, st))
+ elif isinstance(st, arvados.commands.run.ArvFile):
+ self._pathmap[src] = (ab, st.fn)
+ else:
+ raise cwltool.workflow.WorkflowException("Input file path '%s' is invalid" % st)
+
+ if uploadfiles:
+ arvados.commands.run.uploadfiles([u[2] for u in uploadfiles], arvrunner.api, dry_run=kwargs.get("dry_run"), num_retries=3)
+
+ for src, ab, st in uploadfiles:
+ self._pathmap[src] = (ab, st.fn)
+
+
+
+class ArvadosCommandTool(cwltool.draft2tool.CommandLineTool):
+ def __init__(self, arvrunner, toolpath_object, **kwargs):
+ super(ArvadosCommandTool, self).__init__(toolpath_object, **kwargs)
+ self.arvrunner = arvrunner
+
+ def makeJobRunner(self):
+ return ArvadosJob(self.arvrunner)
+
+ def makePathMapper(self, reffiles, input_basedir, **kwargs):
+ return ArvPathMapper(self.arvrunner, reffiles, input_basedir, **kwargs)
+
+
+class ArvCwlRunner(object):
+ def __init__(self, api_client):
+ self.api = api_client
+ self.jobs = {}
+ self.lock = threading.Lock()
+ self.cond = threading.Condition(self.lock)
+ self.final_output = None
+
+ def arvMakeTool(self, toolpath_object, **kwargs):
+ if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
+ return ArvadosCommandTool(self, toolpath_object, **kwargs)
+ else:
+ return cwltool.workflow.defaultMakeTool(toolpath_object, **kwargs)
+
+ def output_callback(self, out, processStatus):
+ if processStatus == "success":
+ logger.info("Overall job status is %s", processStatus)
+ else:
+ logger.warn("Overall job status is %s", processStatus)
+ self.final_output = out
+
+ def on_message(self, event):
+ if "object_uuid" in event:
+ if event["object_uuid"] in self.jobs and event["event_type"] == "update":
+ if event["properties"]["new_attributes"]["state"] == "Running" and self.jobs[event["object_uuid"]].running is False:
+ logger.info("Job %s is Running", event["object_uuid"])
+ with self.lock:
+ self.jobs[event["object_uuid"]].running = True
+ elif event["properties"]["new_attributes"]["state"] in ("Complete", "Failed", "Cancelled"):
+ logger.info("Job %s is %s", event["object_uuid"], event["properties"]["new_attributes"]["state"])
+ try:
+ self.cond.acquire()
+ self.jobs[event["object_uuid"]].done(event["properties"]["new_attributes"])
+ self.cond.notify()
+ finally:
+ self.cond.release()
+
+ def arvExecutor(self, tool, job_order, input_basedir, args, **kwargs):
+ events = arvados.events.subscribe(arvados.api('v1'), [["object_uuid", "is_a", "arvados#job"]], self.on_message)
+
+ self.fs_access = CollectionFsAccess(input_basedir)
+
+ kwargs["fs_access"] = self.fs_access
+ kwargs["enable_reuse"] = args.enable_reuse
+
+ if kwargs.get("conformance_test"):
+ return cwltool.main.single_job_executor(tool, job_order, input_basedir, args, **kwargs)
+ else:
+ jobiter = tool.job(job_order,
+ input_basedir,
+ self.output_callback,
+ **kwargs)
+
+ for runnable in jobiter:
+ if runnable:
+ with self.lock:
+ runnable.run(**kwargs)
+ else:
+ if self.jobs:
+ try:
+ self.cond.acquire()
+ self.cond.wait()
+ finally:
+ self.cond.release()
+ else:
+ logger.error("Workflow cannot make any more progress.")
+ break
+
+ while self.jobs:
+ try:
+ self.cond.acquire()
+ self.cond.wait()
+ finally:
+ self.cond.release()
+
+ events.close()
+
+ if self.final_output is None:
+ raise cwltool.workflow.WorkflowException("Workflow did not return a result.")
+
+ return self.final_output
+
+
+def main(args, stdout, stderr, api_client=None):
+ runner = ArvCwlRunner(api_client=arvados.api('v1'))
+ args.append("--leave-outputs")
+ parser = cwltool.main.arg_parser()
+ exgroup = parser.add_mutually_exclusive_group()
+ exgroup.add_argument("--enable-reuse", action="store_true",
+ default=False, dest="enable_reuse",
+ help="")
+ exgroup.add_argument("--disable-reuse", action="store_false",
+ default=False, dest="enable_reuse",
+ help="")
+
+ return cwltool.main.main(args, executor=runner.arvExecutor, makeTool=runner.arvMakeTool, parser=parser)
--- /dev/null
+#!/usr/bin/env python
+
+import sys
+
+from arvados_cwl import main
+
+sys.exit(main(sys.argv[1:], sys.stdout, sys.stderr))
--- /dev/null
+../python/gittaggers.py
\ No newline at end of file
--- /dev/null
+#!/usr/bin/env python
+
+import os
+import sys
+import setuptools.command.egg_info as egg_info_cmd
+
+from setuptools import setup, find_packages
+
+SETUP_DIR = os.path.dirname(__file__) or '.'
+README = os.path.join(SETUP_DIR, 'README.rst')
+
+try:
+ import gittaggers
+ tagger = gittaggers.EggInfoFromGit
+except ImportError:
+ tagger = egg_info_cmd.egg_info
+
+setup(name='arvados-cwl-runner',
+ version='1.0',
+ description='Arvados Common Workflow Language runner',
+ long_description=open(README).read(),
+ author='Arvados',
+ author_email='info@arvados.org',
+ url="https://arvados.org",
+ download_url="https://github.com/curoverse/arvados.git",
+ license='Apache 2.0',
+ packages=find_packages(),
+ scripts=[
+ 'bin/cwl-runner'
+ ],
+ install_requires=[
+ 'cwltool',
+ 'arvados-python-client'
+ ],
+ zip_safe=True,
+ cmdclass={'egg_info': tagger},
+ )
WriteMakefile(
NAME => 'Arvados',
- VERSION_FROM => 'lib/Arvados.pm'
+ VERSION_FROM => 'lib/Arvados.pm',
+ PREREQ_PM => {
+ 'JSON' => 0,
+ 'LWP' => 0,
+ 'Net::SSL' => 0,
+ },
);
abort('need ARVADOS_API_HOST and ARVADOS_API_TOKEN for {}'.format(instance_name))
return client
+# Check if git is available
+def check_git_availability():
+ try:
+ arvados.util.run_command(['git', '--help'])
+ except Exception:
+ abort('git command is not available. Please ensure git is installed.')
+
# copy_pipeline_instance(pi_uuid, src, dst, args)
#
# Copies a pipeline instance identified by pi_uuid from src to dst.
pi = src.pipeline_instances().get(uuid=pi_uuid).execute(num_retries=args.retries)
if args.recursive:
+ check_git_availability()
+
if not args.dst_git_repo:
abort('--dst-git-repo is required when copying a pipeline recursively.')
# Copy the pipeline template and save the copied template.
pt = src.pipeline_templates().get(uuid=pt_uuid).execute(num_retries=args.retries)
if args.recursive:
+ check_git_availability()
+
if not args.dst_git_repo:
abort('--dst-git-repo is required when copying a pipeline recursively.')
# Copy input collections, docker images and git repos.
obj = arvados.util.portable_data_hash_pattern.sub(copy_collection_fn, obj)
obj = arvados.util.collection_uuid_pattern.sub(copy_collection_fn, obj)
return obj
- elif type(obj) == dict:
+ elif isinstance(obj, dict):
return {v: copy_collections(obj[v], src, dst, args) for v in obj}
- elif type(obj) == list:
+ elif isinstance(obj, list):
return [copy_collections(v, src, dst, args) for v in obj]
return obj
args = arg_parser.parse_args(arguments)
if len(args.paths) == 0:
- args.paths += ['/dev/stdin']
+ args.paths = ['-']
+
+ args.paths = map(lambda x: "-" if x == "/dev/stdin" else x, args.paths)
if len(args.paths) != 1 or os.path.isdir(args.paths[0]):
if args.filename:
args.progress = True
if args.paths == ['-']:
- args.paths = ['/dev/stdin']
+ args.resume = False
if not args.filename:
- args.filename = '-'
+ args.filename = 'stdin'
return args
writer.report_progress()
writer.do_queued_work() # Do work resumed from cache.
for path in args.paths: # Copy file data to Keep.
- if os.path.isdir(path):
+ if path == '-':
+ writer.start_new_stream()
+ writer.start_new_file(args.filename)
+ r = sys.stdin.read(64*1024)
+ while r:
+ # Need to bypass _queued_file check in ResumableCollectionWriter.write() to get
+ # CollectionWriter.write().
+ super(arvados.collection.ResumableCollectionWriter, writer).write(r)
+ r = sys.stdin.read(64*1024)
+ elif os.path.isdir(path):
writer.write_directory_tree(
path, max_manifest_depth=args.max_manifest_depth)
else:
import arvados.commands._util as arv_cmd
logger = logging.getLogger('arvados.arv-run')
+logger.setLevel(logging.INFO)
arvrun_parser = argparse.ArgumentParser(parents=[arv_cmd.retry_opt])
arvrun_parser.add_argument('--dry-run', action="store_true", help="Print out the pipeline that would be submitted and exit")
arvrun_parser.add_argument('--local', action="store_true", help="Run locally using arv-run-pipeline-instance")
-arvrun_parser.add_argument('--docker-image', type=str, default="arvados/jobs", help="Docker image to use, default arvados/jobs")
+arvrun_parser.add_argument('--docker-image', type=str, help="Docker image to use, otherwise use instance default.")
arvrun_parser.add_argument('--ignore-rcode', action="store_true", help="Commands that return non-zero return codes should not be considered failed.")
arvrun_parser.add_argument('--no-reuse', action="store_true", help="Do not reuse past jobs.")
arvrun_parser.add_argument('--no-wait', action="store_true", help="Do not wait and display logs after submitting command, just exit.")
return prefix+fn
+def uploadfiles(files, api, dry_run=False, num_retries=0, project=None):
+ # Find the smallest path prefix that includes all the files that need to be uploaded.
+ # This starts at the root and iteratively removes common parent directory prefixes
+ # until all file pathes no longer have a common parent.
+ n = True
+ pathprefix = "/"
+ while n:
+ pathstep = None
+ for c in files:
+ if pathstep is None:
+ sp = c.fn.split('/')
+ if len(sp) < 2:
+ # no parent directories left
+ n = False
+ break
+ # path step takes next directory
+ pathstep = sp[0] + "/"
+ else:
+ # check if pathstep is common prefix for all files
+ if not c.fn.startswith(pathstep):
+ n = False
+ break
+ if n:
+ # pathstep is common parent directory for all files, so remove the prefix
+ # from each path
+ pathprefix += pathstep
+ for c in files:
+ c.fn = c.fn[len(pathstep):]
+
+ orgdir = os.getcwd()
+ os.chdir(pathprefix)
+
+ logger.info("Upload local files: \"%s\"", '" "'.join([c.fn for c in files]))
+
+ if dry_run:
+ logger.info("$(input) is %s", pathprefix.rstrip('/'))
+ pdh = "$(input)"
+ else:
+ files = sorted(files, key=lambda x: x.fn)
+ collection = arvados.CollectionWriter(api, num_retries=num_retries)
+ stream = None
+ for f in files:
+ sp = os.path.split(f.fn)
+ if sp[0] != stream:
+ stream = sp[0]
+ collection.start_new_stream(stream)
+ collection.write_file(f.fn, sp[1])
+ item = api.collections().create(body={"owner_uuid": project, "manifest_text": collection.manifest_text()}).execute()
+ pdh = item["portable_data_hash"]
+ logger.info("Uploaded to %s", item["uuid"])
+
+ for c in files:
+ c.fn = "$(file %s/%s)" % (pdh, c.fn)
+
+ os.chdir(orgdir)
+
+
def main(arguments=None):
args = arvrun_parser.parse_args(arguments)
command[i] = statfile(m.group(1), m.group(2))
break
- n = True
- pathprefix = "/"
files = [c for command in slots[1:] for c in command if isinstance(c, UploadFile)]
- if len(files) > 0:
- # Find the smallest path prefix that includes all the files that need to be uploaded.
- # This starts at the root and iteratively removes common parent directory prefixes
- # until all file pathes no longer have a common parent.
- while n:
- pathstep = None
- for c in files:
- if pathstep is None:
- sp = c.fn.split('/')
- if len(sp) < 2:
- # no parent directories left
- n = False
- break
- # path step takes next directory
- pathstep = sp[0] + "/"
- else:
- # check if pathstep is common prefix for all files
- if not c.fn.startswith(pathstep):
- n = False
- break
- if n:
- # pathstep is common parent directory for all files, so remove the prefix
- # from each path
- pathprefix += pathstep
- for c in files:
- c.fn = c.fn[len(pathstep):]
-
- orgdir = os.getcwd()
- os.chdir(pathprefix)
-
- print("Upload local files: \"%s\"" % '" "'.join([c.fn for c in files]))
-
- if args.dry_run:
- print("$(input) is %s" % pathprefix.rstrip('/'))
- pdh = "$(input)"
- else:
- files = sorted(files, key=lambda x: x.fn)
- collection = arvados.CollectionWriter(api, num_retries=args.retries)
- stream = None
- for f in files:
- sp = os.path.split(f.fn)
- if sp[0] != stream:
- stream = sp[0]
- collection.start_new_stream(stream)
- collection.write_file(f.fn, sp[1])
- item = api.collections().create(body={"owner_uuid": project, "manifest_text": collection.manifest_text()}).execute()
- pdh = item["portable_data_hash"]
- print "Uploaded to %s" % item["uuid"]
-
- for c in files:
- c.fn = "$(file %s/%s)" % (pdh, c.fn)
-
- os.chdir(orgdir)
+ if files:
+ uploadfiles(files, api, dry_run=args.dry_run, num_retries=args.num_retries, project=project)
for i in xrange(1, len(slots)):
slots[i] = [("%s%s" % (c.prefix, c.fn)) if isinstance(c, ArvFile) else c for c in slots[i]]
"repository": args.repository,
"script_parameters": {
},
- "runtime_constraints": {
- "docker_image": args.docker_image
- }
+ "runtime_constraints": {}
}
+ if args.docker_image:
+ component["runtime_constraints"]["docker_image"] = args.docker_image
+
task_foreach = []
group_parser = argparse.ArgumentParser()
group_parser.add_argument('-b', '--batch-size', type=int)
else:
pipeline["owner_uuid"] = project
pi = api.pipeline_instances().create(body=pipeline, ensure_unique_name=True).execute()
- print "Running pipeline %s" % pi["uuid"]
+ logger.info("Running pipeline %s", pi["uuid"])
if args.local:
subprocess.call(["arv-run-pipeline-instance", "--instance", pi["uuid"], "--run-jobs-here"] + (["--no-reuse"] if args.no_reuse else []))
ws.main(["--pipeline", pi["uuid"]])
pi = api.pipeline_instances().get(uuid=pi["uuid"]).execute()
- print "Pipeline is %s" % pi["state"]
+ logger.info("Pipeline is %s", pi["state"])
if "output_uuid" in pi["components"]["command"]:
- print "Output is %s" % pi["components"]["command"]["output_uuid"]
+ logger.info("Output is %s", pi["components"]["command"]["output_uuid"])
else:
- print "No output"
+ logger.info("No output")
if __name__ == '__main__':
main()
parser = argparse.ArgumentParser()
parser.add_argument('-u', '--uuid', type=str, default="", help="Filter events on object_uuid")
parser.add_argument('-f', '--filters', type=str, default="", help="Arvados query filter to apply to log events (JSON encoded)")
+ parser.add_argument('-s', '--start-time', type=str, default="", help="Arvados query filter to fetch log events created at or after this time. This will be server time in UTC. Allowed format: YYYY-MM-DD or YYYY-MM-DD hh:mm:ss")
group = parser.add_mutually_exclusive_group()
group.add_argument('--poll-interval', default=15, type=int, help="If websockets is not available, specify the polling interval, default is every 15 seconds")
if args.pipeline:
filters += [ ['object_uuid', '=', args.pipeline] ]
+ if args.start_time:
+ last_log_id = 1
+ filters += [ ['created_at', '>=', args.start_time] ]
+ else:
+ last_log_id = None
+
def on_message(ev):
global filters
global ws
print json.dumps(ev)
try:
- ws = subscribe(arvados.api('v1'), filters, on_message, poll_fallback=args.poll_interval)
+ ws = subscribe(arvados.api('v1'), filters, on_message, poll_fallback=args.poll_interval, last_log_id=last_log_id)
if ws:
if args.pipeline:
c = api.pipeline_instances().get(uuid=args.pipeline).execute()
_logger = logging.getLogger('arvados.events')
class EventClient(WebSocketClient):
- def __init__(self, url, filters, on_event):
+ def __init__(self, url, filters, on_event, last_log_id):
ssl_options = {'ca_certs': arvados.util.ca_certs_path()}
if config.flag_is_true('ARVADOS_API_HOST_INSECURE'):
ssl_options['cert_reqs'] = ssl.CERT_NONE
super(EventClient, self).__init__(url, ssl_options=ssl_options)
self.filters = filters
self.on_event = on_event
+ self.stop = threading.Event()
+ self.last_log_id = last_log_id
def opened(self):
- self.subscribe(self.filters)
+ self.subscribe(self.filters, self.last_log_id)
def received_message(self, m):
self.on_event(json.loads(str(m)))
- def close_connection(self):
- try:
- self.sock.shutdown(socket.SHUT_RDWR)
- self.sock.close()
- except:
- pass
+ def closed(self, code, reason=None):
+ self.stop.set()
+
+ def close(self, code=1000, reason=''):
+ """Close event client and wait for it to finish."""
+
+ # parent close() method sends a asynchronous "closed" event to the server
+ super(EventClient, self).close(code, reason)
+
+ # if server doesn't respond by finishing the close handshake, we'll be
+ # stuck in limbo forever. We don't need to wait for the server to
+ # respond to go ahead and actually close the socket.
+ self.close_connection()
+
+ # wait for websocket thread to finish up (closed() is called by
+ # websocket thread in as part of terminate())
+ while not self.stop.is_set():
+ self.stop.wait(1)
def subscribe(self, filters, last_log_id=None):
m = {"method": "subscribe", "filters": filters}
self.send(json.dumps({"method": "unsubscribe", "filters": filters}))
class PollClient(threading.Thread):
- def __init__(self, api, filters, on_event, poll_time):
+ def __init__(self, api, filters, on_event, poll_time, last_log_id):
super(PollClient, self).__init__()
self.api = api
if filters:
self.poll_time = poll_time
self.daemon = True
self.stop = threading.Event()
+ self.last_log_id = last_log_id
def run(self):
self.id = 0
- for f in self.filters:
- items = self.api.logs().list(limit=1, order="id desc", filters=f).execute()['items']
- if items:
- if items[0]['id'] > self.id:
- self.id = items[0]['id']
+ if self.last_log_id != None:
+ self.id = self.last_log_id
+ else:
+ for f in self.filters:
+ items = self.api.logs().list(limit=1, order="id desc", filters=f).execute()['items']
+ if items:
+ if items[0]['id'] > self.id:
+ self.id = items[0]['id']
self.on_event({'status': 200})
while not self.stop.isSet():
max_id = self.id
+ moreitems = False
for f in self.filters:
- items = self.api.logs().list(order="id asc", filters=f+[["id", ">", str(self.id)]]).execute()['items']
- for i in items:
+ items = self.api.logs().list(order="id asc", filters=f+[["id", ">", str(self.id)]]).execute()
+ for i in items["items"]:
if i['id'] > max_id:
max_id = i['id']
self.on_event(i)
+ if items["items_available"] > len(items["items"]):
+ moreitems = True
self.id = max_id
- self.stop.wait(self.poll_time)
+ if not moreitems:
+ self.stop.wait(self.poll_time)
def run_forever(self):
# Have to poll here, otherwise KeyboardInterrupt will never get processed.
self.stop.wait(1)
def close(self):
+ """Close poll client and wait for it to finish."""
+
self.stop.set()
try:
self.join()
del self.filters[self.filters.index(filters)]
-def _subscribe_websocket(api, filters, on_event):
+def _subscribe_websocket(api, filters, on_event, last_log_id=None):
endpoint = api._rootDesc.get('websocketUrl', None)
if not endpoint:
raise errors.FeatureNotEnabledError(
"Server does not advertise a websocket endpoint")
- uri_with_token = "{}?api_token={}".format(endpoint, api.api_token)
- client = EventClient(uri_with_token, filters, on_event)
- ok = False
try:
- client.connect()
- ok = True
- return client
- finally:
- if not ok:
- client.close_connection()
-
-def subscribe(api, filters, on_event, poll_fallback=15):
+ uri_with_token = "{}?api_token={}".format(endpoint, api.api_token)
+ client = EventClient(uri_with_token, filters, on_event, last_log_id)
+ ok = False
+ try:
+ client.connect()
+ ok = True
+ return client
+ finally:
+ if not ok:
+ client.close_connection()
+ except:
+ _logger.warn("Failed to connect to websockets on %s" % endpoint)
+ raise
+
+
+def subscribe(api, filters, on_event, poll_fallback=15, last_log_id=None):
"""
:api:
a client object retrieved from arvados.api(). The caller should not use this client object for anything else after calling subscribe().
The callback when a message is received.
:poll_fallback:
If websockets are not available, fall back to polling every N seconds. If poll_fallback=False, this will return None if websockets are not available.
+ :last_log_id:
+ Log rows that are newer than the log id
"""
if not poll_fallback:
- return _subscribe_websocket(api, filters, on_event)
+ return _subscribe_websocket(api, filters, on_event, last_log_id)
try:
- return _subscribe_websocket(api, filters, on_event)
+ return _subscribe_websocket(api, filters, on_event, last_log_id)
except Exception as e:
_logger.warn("Falling back to polling after websocket error: %s" % e)
- p = PollClient(api, filters, on_event, poll_fallback)
+ p = PollClient(api, filters, on_event, poll_fallback, last_log_id)
p.start()
return p
p = subprocess.Popen(execargs, **kwargs)
stdoutdata, stderrdata = p.communicate(None)
if p.returncode != 0:
- raise errors.CommandFailedError(
+ raise arvados.errors.CommandFailedError(
"run_command %s exit %d:\n%s" %
(execargs, p.returncode, stderrdata))
return stdoutdata, stderrdata
elif re.search('\.tar$', f.name()):
p = tar_extractor(path, '')
else:
- raise errors.AssertionError(
+ raise arvados.errors.AssertionError(
"tarball_extract cannot handle filename %s" % f.name())
while True:
buf = f.read(2**20)
p.wait()
if p.returncode != 0:
lockfile.close()
- raise errors.CommandFailedError(
+ raise arvados.errors.CommandFailedError(
"tar exited %d" % p.returncode)
os.symlink(tarball, os.path.join(path, '.locator'))
tld_extracts = filter(lambda f: f != '.locator', os.listdir(path))
for f in CollectionReader(zipball).all_files():
if not re.search('\.zip$', f.name()):
- raise errors.NotImplementedError(
+ raise arvados.errors.NotImplementedError(
"zipball_extract cannot handle filename %s" % f.name())
zip_filename = os.path.join(path, os.path.basename(f.name()))
zip_file = open(zip_filename, 'wb')
p.wait()
if p.returncode != 0:
lockfile.close()
- raise errors.CommandFailedError(
+ raise arvados.errors.CommandFailedError(
"unzip exited %d" % p.returncode)
os.unlink(zip_filename)
os.symlink(zipball, os.path.join(path, '.locator'))
outfile.write(buf)
outfile.close()
if len(files_got) < len(files):
- raise errors.AssertionError(
+ raise arvados.errors.AssertionError(
"Wanted files %s but only got %s from %s" %
(files, files_got,
[z.name() for z in CollectionReader(collection).all_files()]))
outfile.write(buf)
outfile.close()
if len(files_got) < len(files):
- raise errors.AssertionError(
+ raise arvados.errors.AssertionError(
"Wanted files %s but only got %s from %s" %
(files, files_got, [z.name() for z in stream.all_files()]))
lockfile.close()
"""
num_length_args = len(length_args)
if num_length_args > 2:
- raise errors.ArgumentError("is_hex accepts up to 3 arguments ({} given)"
- .format(1 + num_length_args))
+ raise arvados.errors.ArgumentError(
+ "is_hex accepts up to 3 arguments ({} given)".format(1 + num_length_args))
elif num_length_args == 2:
good_len = (length_args[0] <= len(s) <= length_args[1])
elif num_length_args == 1:
help="""
Collection locator, optionally with a file path or prefix.
""")
-parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout',
+parser.add_argument('destination', type=str, nargs='?', default='-',
help="""
-Local file or directory where the data is to be written. Default:
-/dev/stdout.
+Local file or directory where the data is to be written. Default: stdout.
""")
group = parser.add_mutually_exclusive_group()
group.add_argument('--progress', action='store_true',
help="""
Overwrite existing files while writing. The default behavior is to
refuse to write *anything* if any of the output files already
-exist. As a special case, -f is not needed to write to /dev/stdout.
+exist. As a special case, -f is not needed to write to stdout.
""")
group.add_argument('--skip-existing', action='store_true',
help="""
logger.debug("Appended source file name to destination directory: %s",
args.destination)
-if args.destination == '-':
- args.destination = '/dev/stdout'
if args.destination == '/dev/stdout':
+ args.destination = "-"
+
+if args.destination == '-':
# Normally you have to use -f to write to a file (or device) that
# already exists, but "-" and "/dev/stdout" are common enough to
# merit a special exception.
# that isn't a tty.
if (not (args.batch_progress or args.no_progress)
and sys.stderr.isatty()
- and (args.destination != '/dev/stdout'
+ and (args.destination != '-'
or not sys.stdout.isatty())):
args.progress = True
if not args.f:
open_flags |= os.O_EXCL
try:
- out_fd = os.open(args.destination, open_flags)
- with os.fdopen(out_fd, 'wb') as out_file:
- out_file.write(reader.manifest_text())
+ if args.destination == "-":
+ sys.stdout.write(reader.manifest_text())
+ else:
+ out_fd = os.open(args.destination, open_flags)
+ with os.fdopen(out_fd, 'wb') as out_file:
+ out_file.write(reader.manifest_text())
except (IOError, OSError) as error:
abort("can't write to '{}': {}".format(args.destination, error))
except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
if 0 != string.find(os.path.join(s.name(), f.name()),
'.' + get_prefix):
continue
- dest_path = os.path.join(
- args.destination,
- os.path.join(s.name(), f.name())[len(get_prefix)+1:])
- if (not (args.n or args.f or args.skip_existing) and
- os.path.exists(dest_path)):
- abort('Local file %s already exists.' % (dest_path,))
+ if args.destination == "-":
+ dest_path = "-"
+ else:
+ dest_path = os.path.join(
+ args.destination,
+ os.path.join(s.name(), f.name())[len(get_prefix)+1:])
+ if (not (args.n or args.f or args.skip_existing) and
+ os.path.exists(dest_path)):
+ abort('Local file %s already exists.' % (dest_path,))
else:
if os.path.join(s.name(), f.name()) != '.' + get_prefix:
continue
outfile = None
digestor = None
if not args.n:
- if args.skip_existing and os.path.exists(outfilename):
- logger.debug('Local file %s exists. Skipping.', outfilename)
- continue
- elif not args.f and (os.path.isfile(outfilename) or
- os.path.isdir(outfilename)):
- # Good thing we looked again: apparently this file wasn't
- # here yet when we checked earlier.
- abort('Local file %s already exists.' % (outfilename,))
- if args.r:
- arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
- try:
- outfile = open(outfilename, 'wb')
- except Exception as error:
- abort('Open(%s) failed: %s' % (outfilename, error))
+ if outfilename == "-":
+ outfile = sys.stdout
+ else:
+ if args.skip_existing and os.path.exists(outfilename):
+ logger.debug('Local file %s exists. Skipping.', outfilename)
+ continue
+ elif not args.f and (os.path.isfile(outfilename) or
+ os.path.isdir(outfilename)):
+ # Good thing we looked again: apparently this file wasn't
+ # here yet when we checked earlier.
+ abort('Local file %s already exists.' % (outfilename,))
+ if args.r:
+ arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
+ try:
+ outfile = open(outfilename, 'wb')
+ except Exception as error:
+ abort('Open(%s) failed: %s' % (outfilename, error))
if args.hash:
digestor = hashlib.new(args.hash)
try:
sys.stderr.write("%s %s/%s\n"
% (digestor.hexdigest(), s.name(), f.name()))
except KeyboardInterrupt:
- if outfile and outfilename != '/dev/stdout':
- os.unlink(outfilename)
+ if outfile and (outfile.fileno() > 2) and not outfile.closed:
+ os.unlink(outfile.name)
break
if args.progress:
sock.close()
return port
+def _wait_until_port_listens(port, timeout=10):
+ """Wait for a process to start listening on the given port.
+
+ If nothing listens on the port within the specified timeout (given
+ in seconds), print a warning on stderr before returning.
+ """
+ try:
+ subprocess.check_output(['which', 'lsof'])
+ except subprocess.CalledProcessError:
+ print("WARNING: No `lsof` -- cannot wait for port to listen. "+
+ "Sleeping 0.5 and hoping for the best.")
+ time.sleep(0.5)
+ return
+ deadline = time.time() + timeout
+ while time.time() < deadline:
+ try:
+ subprocess.check_output(
+ ['lsof', '-t', '-i', 'tcp:'+str(port)])
+ except subprocess.CalledProcessError:
+ time.sleep(0.1)
+ continue
+ return
+ print(
+ "WARNING: Nothing is listening on port {} (waited {} seconds).".
+ format(port, timeout),
+ file=sys.stderr)
+
def run(leave_running_atexit=False):
"""Ensure an API server is running, and ARVADOS_API_* env vars have
admin credentials for it.
my_api_host = match.group(1)
os.environ['ARVADOS_API_HOST'] = my_api_host
- # Make sure the server has written its pid file before continuing
+ # Make sure the server has written its pid file and started
+ # listening on its TCP port
find_server_pid(pid_file)
+ _wait_until_port_listens(port)
reset()
os.chdir(restore_cwd)
with open("{}/keep{}.volume".format(TEST_TMPDIR, n), 'w') as f:
f.write(keep0)
+ _wait_until_port_listens(port)
+
return port
def run_keep(blob_signing_key=None, enforce_permissions=False):
}}).execute()
os.environ["ARVADOS_KEEP_PROXY"] = "http://localhost:{}".format(port)
_setport('keepproxy', port)
+ _wait_until_port_listens(port)
def stop_keep_proxy():
if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
with open(_pidfile('arv-git-httpd'), 'w') as f:
f.write(str(agh.pid))
_setport('arv-git-httpd', gitport)
+ _wait_until_port_listens(gitport)
def stop_arv_git_httpd():
if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
--- /dev/null
+#!/usr/bin/env python
+
+import unittest
+import arvados.errors as arv_error
+import arvados.commands.ws as arv_ws
+
+class ArvWsTestCase(unittest.TestCase):
+ def run_ws(self, args):
+ return arv_ws.main(args)
+
+ def test_unsupported_arg(self):
+ with self.assertRaises(SystemExit):
+ self.run_ws(['-x=unknown'])
-import unittest
import os
-import arvados.util
+import subprocess
+import unittest
+
+import arvados
class MkdirDashPTest(unittest.TestCase):
def setUp(self):
with open('./tmp/bar', 'wb') as f:
f.write('bar')
self.assertRaises(OSError, arvados.util.mkdir_dash_p, './tmp/bar')
+
+
+class RunCommandTestCase(unittest.TestCase):
+ def test_success(self):
+ stdout, stderr = arvados.util.run_command(['echo', 'test'],
+ stderr=subprocess.PIPE)
+ self.assertEqual("test\n", stdout)
+ self.assertEqual("", stderr)
+
+ def test_failure(self):
+ with self.assertRaises(arvados.errors.CommandFailedError):
+ arvados.util.run_command(['false'])
import arvados.events
import mock
import threading
+from datetime import datetime, timedelta
+import time
class WebsocketTest(run_test_server.TestCaseWithServers):
MAIN_SERVER = {}
self.ws.close()
super(WebsocketTest, self).tearDown()
- def _test_subscribe(self, poll_fallback, expect_type):
+ def _test_subscribe(self, poll_fallback, expect_type, last_log_id=None, additional_filters=None, expected=1):
run_test_server.authorize_with('active')
- events = Queue.Queue(3)
+ events = Queue.Queue(100)
+
+ # Create ancestor before subscribing.
+ # When listening with start_time in the past, this should also be retrieved.
+ # However, when start_time is omitted in subscribe, this should not be fetched.
+ ancestor = arvados.api('v1').humans().create(body={}).execute()
+ time.sleep(5)
+
+ filters = [['object_uuid', 'is_a', 'arvados#human']]
+ if additional_filters:
+ filters = filters + additional_filters
+
self.ws = arvados.events.subscribe(
- arvados.api('v1'), [['object_uuid', 'is_a', 'arvados#human']],
- events.put, poll_fallback=poll_fallback)
+ arvados.api('v1'), filters,
+ events.put, poll_fallback=poll_fallback, last_log_id=last_log_id)
self.assertIsInstance(self.ws, expect_type)
- self.assertEqual(200, events.get(True, 10)['status'])
+ self.assertEqual(200, events.get(True, 5)['status'])
human = arvados.api('v1').humans().create(body={}).execute()
- self.assertEqual(human['uuid'], events.get(True, 10)['object_uuid'])
- self.assertTrue(events.empty(), "got more events than expected")
+
+ if last_log_id == None or expected == 0:
+ self.assertEqual(human['uuid'], events.get(True, 5)['object_uuid'])
+ self.assertTrue(events.empty(), "got more events than expected")
+ else:
+ log_events = []
+ for i in range(0, 20):
+ try:
+ event = events.get(True, 5)
+ self.assertTrue(event['object_uuid'] is not None)
+ log_events.append(event['object_uuid'])
+ except:
+ break;
+
+ self.assertTrue(len(log_events)>1)
+ self.assertTrue(human['uuid'] in log_events)
+ self.assertTrue(ancestor['uuid'] in log_events)
def test_subscribe_websocket(self):
self._test_subscribe(
event_client_constr.side_effect = Exception('All is well')
self._test_subscribe(
poll_fallback=1, expect_type=arvados.events.PollClient)
+
+ def test_subscribe_websocket_with_start_time_date_only(self):
+ lastHour = datetime.today() - timedelta(hours = 1)
+ self._test_subscribe(
+ poll_fallback=False, expect_type=arvados.events.EventClient, last_log_id=1,
+ additional_filters=[['created_at', '>=', lastHour.strftime('%Y-%m-%d')]])
+
+ @mock.patch('arvados.events.EventClient.__init__')
+ def test_poll_with_start_time_date_only(self, event_client_constr):
+ event_client_constr.side_effect = Exception('All is well')
+ lastHour = datetime.today() - timedelta(hours = 1)
+ self._test_subscribe(
+ poll_fallback=1, expect_type=arvados.events.PollClient, last_log_id=1,
+ additional_filters=[['created_at', '>=', lastHour.strftime('%Y-%m-%d')]])
+
+ def test_subscribe_websocket_with_start_time_last_hour(self):
+ lastHour = datetime.today() - timedelta(hours = 1)
+ self._test_subscribe(
+ poll_fallback=False, expect_type=arvados.events.EventClient, last_log_id=1,
+ additional_filters=[['created_at', '>=', lastHour.strftime('%Y-%m-%d %H:%M:%S')]])
+
+ @mock.patch('arvados.events.EventClient.__init__')
+ def test_subscribe_poll_with_start_time_last_hour(self, event_client_constr):
+ event_client_constr.side_effect = Exception('All is well')
+ lastHour = datetime.today() - timedelta(hours = 1)
+ self._test_subscribe(
+ poll_fallback=1, expect_type=arvados.events.PollClient, last_log_id=1,
+ additional_filters=[['created_at', '>=', lastHour.strftime('%Y-%m-%d %H:%M:%S')]])
+
+ def test_subscribe_websocket_with_start_time_next_hour(self):
+ nextHour = datetime.today() + timedelta(hours = 1)
+ with self.assertRaises(Queue.Empty):
+ self._test_subscribe(
+ poll_fallback=False, expect_type=arvados.events.EventClient, last_log_id=1,
+ additional_filters=[['created_at', '>=', nextHour.strftime('%Y-%m-%d %H:%M:%S')]], expected=0)
+
+ @mock.patch('arvados.events.EventClient.__init__')
+ def test_subscribe_poll_with_start_time_next_hour(self, event_client_constr):
+ event_client_constr.side_effect = Exception('All is well')
+ nextHour = datetime.today() + timedelta(hours = 1)
+ with self.assertRaises(Queue.Empty):
+ self._test_subscribe(
+ poll_fallback=1, expect_type=arvados.events.PollClient, last_log_id=1,
+ additional_filters=[['created_at', '>=', nextHour.strftime('%Y-%m-%d %H:%M:%S')]], expected=0)
+
+ def test_subscribe_websocket_with_start_time_tomorrow(self):
+ tomorrow = datetime.today() + timedelta(hours = 24)
+ with self.assertRaises(Queue.Empty):
+ self._test_subscribe(
+ poll_fallback=False, expect_type=arvados.events.EventClient, last_log_id=1,
+ additional_filters=[['created_at', '>=', tomorrow.strftime('%Y-%m-%d')]], expected=0)
+
+ @mock.patch('arvados.events.EventClient.__init__')
+ def test_subscribe_poll_with_start_time_tomorrow(self, event_client_constr):
+ event_client_constr.side_effect = Exception('All is well')
+ tomorrow = datetime.today() + timedelta(hours = 24)
+ with self.assertRaises(Queue.Empty):
+ self._test_subscribe(
+ poll_fallback=1, expect_type=arvados.events.PollClient, last_log_id=1,
+ additional_filters=[['created_at', '>=', tomorrow.strftime('%Y-%m-%d')]], expected=0)
skip_before_filter :find_object_by_uuid, :only => :get_all_permissions
skip_before_filter :render_404_if_no_object, :only => :get_all_permissions
before_filter :admin_required, :only => :get_all_permissions
+
def get_all_permissions
- @users = {}
- User.includes(:authorized_keys).find_each do |u|
- @users[u.uuid] = u
+ # users is a map of {user_uuid => User object}
+ users = {}
+ # user_aks is a map of {user_uuid => array of public keys}
+ user_aks = {}
+ # admins is an array of user_uuids
+ admins = []
+ User.eager_load(:authorized_keys).find_each do |u|
+ next unless u.is_active or u.uuid == anonymous_user_uuid
+ users[u.uuid] = u
+ user_aks[u.uuid] = u.authorized_keys.collect do |ak|
+ {
+ public_key: ak.public_key,
+ authorized_key_uuid: ak.uuid
+ }
+ end
+ admins << u.uuid if u.is_admin
end
- admins = @users.select { |k,v| v.is_admin }
- @user_aks = {}
@repo_info = {}
- Repository.includes(:permissions).find_each do |repo|
+ Repository.eager_load(:permissions).find_each do |repo|
@repo_info[repo.uuid] = {
uuid: repo.uuid,
name: repo.name,
fetch_url: repo.fetch_url,
user_permissions: {},
}
- gitolite_permissions = ''
- perms = []
+ # evidence is an array of {name: 'can_xxx', user_uuid: 'x-y-z'},
+ # one entry for each piece of evidence we find in the permission
+ # database that establishes that a user can access this
+ # repository. Multiple entries can be added for a given user,
+ # possibly with different access levels; these will be compacted
+ # below.
+ evidence = []
repo.permissions.each do |perm|
if ArvadosModel::resource_class_for_uuid(perm.tail_uuid) == Group
- @users.each do |user_uuid, user|
- user.group_permissions.each do |group_uuid, perm_mask|
- if perm_mask[:manage]
- perms << {name: 'can_manage', user_uuid: user_uuid}
- elsif perm_mask[:write]
- perms << {name: 'can_write', user_uuid: user_uuid}
- elsif perm_mask[:read]
- perms << {name: 'can_read', user_uuid: user_uuid}
- end
+ # A group has permission. Each user who has access to this
+ # group also has access to the repository. Access level is
+ # min(group-to-repo permission, user-to-group permission).
+ users.each do |user_uuid, user|
+ perm_mask = user.group_permissions[perm.tail_uuid]
+ if not perm_mask
+ next
+ elsif perm_mask[:manage] and perm.name == 'can_manage'
+ evidence << {name: 'can_manage', user_uuid: user_uuid}
+ elsif perm_mask[:write] and ['can_manage', 'can_write'].index perm.name
+ evidence << {name: 'can_write', user_uuid: user_uuid}
+ elsif perm_mask[:read]
+ evidence << {name: 'can_read', user_uuid: user_uuid}
end
end
- else
- perms << {name: perm.name, user_uuid: perm.tail_uuid}
+ elsif users[perm.tail_uuid]
+ # A user has permission; the user exists; and either the
+ # user is active, or it's the special case of the anonymous
+ # user which is never "active" but is allowed to read
+ # content from public repositories.
+ evidence << {name: perm.name, user_uuid: perm.tail_uuid}
end
end
- # Owner of the repository, and all admins, can RW
- ([repo.owner_uuid] + admins.keys).each do |user_uuid|
- perms << {name: 'can_write', user_uuid: user_uuid}
+ # Owner of the repository, and all admins, can do everything.
+ ([repo.owner_uuid] | admins).each do |user_uuid|
+ # Except: no permissions for inactive users, even if they own
+ # repositories.
+ next unless users[user_uuid]
+ evidence << {name: 'can_manage', user_uuid: user_uuid}
end
- perms.each do |perm|
+ # Distill all the evidence about permissions on this repository
+ # into one hash per user, of the form {'can_xxx' => true, ...}.
+ # The hash is nil for a user who has no permissions at all on
+ # this particular repository.
+ evidence.each do |perm|
user_uuid = perm[:user_uuid]
- @user_aks[user_uuid] = @users[user_uuid].andand.authorized_keys.andand.
- collect do |ak|
- {
- public_key: ak.public_key,
- authorized_key_uuid: ak.uuid
- }
- end || []
- if @user_aks[user_uuid].any?
- ri = (@repo_info[repo.uuid][:user_permissions][user_uuid] ||= {})
- ri[perm[:name]] = true
- end
+ user_perms = (@repo_info[repo.uuid][:user_permissions][user_uuid] ||= {})
+ user_perms[perm[:name]] = true
end
end
- @repo_info.values.each do |repo_users|
- repo_users[:user_permissions].each do |user_uuid,perms|
- if perms['can_manage']
- perms[:gitolite_permissions] = 'RW'
- perms['can_write'] = true
- perms['can_read'] = true
- elsif perms['can_write']
- perms[:gitolite_permissions] = 'RW'
- perms['can_read'] = true
- elsif perms['can_read']
- perms[:gitolite_permissions] = 'R'
+ # Revisit each {'can_xxx' => true, ...} hash for some final
+ # cleanup to make life easier for the requestor.
+ #
+ # Add a 'gitolite_permissions' key alongside the 'can_xxx' keys,
+ # for the convenience of the gitolite config file generator.
+ #
+ # Add all lesser permissions when a greater permission is
+ # present. If the requestor only wants to know who can write, it
+ # only has to test for 'can_write' in the response.
+ @repo_info.values.each do |repo|
+ repo[:user_permissions].each do |user_uuid, user_perms|
+ if user_perms['can_manage']
+ user_perms['gitolite_permissions'] = 'RW'
+ user_perms['can_write'] = true
+ user_perms['can_read'] = true
+ elsif user_perms['can_write']
+ user_perms['gitolite_permissions'] = 'RW'
+ user_perms['can_read'] = true
+ elsif user_perms['can_read']
+ user_perms['gitolite_permissions'] = 'R'
end
end
end
+ # The response looks like
+ # {"kind":"...",
+ # "repositories":[r1,r2,r3,...],
+ # "user_keys":usermap}
+ # where each of r1,r2,r3 looks like
+ # {"uuid":"repo-uuid-1",
+ # "name":"username/reponame",
+ # "push_url":"...",
+ # "user_permissions":{"user-uuid-a":{"can_read":true,"gitolite_permissions":"R"}}}
+ # and usermap looks like
+ # {"user-uuid-a":[{"public_key":"ssh-rsa g...","authorized_key_uuid":"ak-uuid-g"},...],
+ # "user-uuid-b":[{"public_key":"ssh-rsa h...","authorized_key_uuid":"ak-uuid-h"},...],...}
send_json(kind: 'arvados#RepositoryPermissionSnapshot',
repositories: @repo_info.values,
- user_keys: @user_aks)
+ user_keys: user_aks)
end
end
end
def get_all_logins
- @users = {}
- User.includes(:authorized_keys).all.each do |u|
- @users[u.uuid] = u
- end
@response = []
- @vms = VirtualMachine.includes(:login_permissions)
+ @vms = VirtualMachine.eager_load :login_permissions
if @object
- @vms = @vms.where('uuid=?', @object.uuid)
+ @vms = @vms.where uuid: @object.uuid
else
@vms = @vms.all
end
+ @users = {}
+ User.eager_load(:authorized_keys).
+ where('users.uuid in (?)',
+ @vms.map { |vm| vm.login_permissions.map &:tail_uuid }.flatten.uniq).
+ each do |u|
+ @users[u.uuid] = u
+ end
@vms.each do |vm|
vm.login_permissions.each do |perm|
user_uuid = perm.tail_uuid
- @users[user_uuid].andand.authorized_keys.andand.each do |ak|
- unless perm.properties['username'].blank?
- @response << {
- username: perm.properties['username'],
- hostname: vm.hostname,
- groups: (perm.properties["groups"].to_a rescue []),
- public_key: ak.public_key,
- user_uuid: user_uuid,
- virtual_machine_uuid: vm.uuid,
- authorized_key_uuid: ak.uuid
- }
- end
+ next if not @users[user_uuid]
+ next if perm.properties['username'].blank?
+ aks = @users[user_uuid].authorized_keys
+ if aks.empty?
+ # We'll emit one entry, with no public key.
+ aks = [nil]
+ end
+ aks.each do |ak|
+ @response << {
+ username: perm.properties['username'],
+ hostname: vm.hostname,
+ groups: (perm.properties['groups'].to_a rescue []),
+ public_key: ak ? ak.public_key : nil,
+ user_uuid: user_uuid,
+ virtual_machine_uuid: vm.uuid,
+ authorized_key_uuid: ak ? ak.uuid : nil,
+ }
end
end
end
after_destroy :log_destroy
after_find :convert_serialized_symbols_to_strings
before_validation :normalize_collection_uuids
+ before_validation :set_default_owner
validate :ensure_serialized_attribute_type
validate :ensure_valid_uuids
true
end
- def ensure_owner_uuid_is_permitted
- raise PermissionDeniedError if !current_user
-
- if new_record? and respond_to? :owner_uuid=
+ def set_default_owner
+ if new_record? and current_user and respond_to? :owner_uuid=
self.owner_uuid ||= current_user.uuid
end
+ end
+
+ def ensure_owner_uuid_is_permitted
+ raise PermissionDeniedError if !current_user
if self.owner_uuid.nil?
errors.add :owner_uuid, "cannot be nil"
def public_key_must_be_unique
if self.public_key
- #key = /^ssh-(rsa|dss) [A-Za-z0-9+\/=\+]+\b/.match(self.public_key)
valid_key = SSHKey.valid_ssh_public_key? self.public_key
if not valid_key
errors.add(:public_key, "does not appear to be a valid ssh-rsa or dsa public key")
else
# Valid if no other rows have this public key
- if self.class.where('public_key like ?', "%#{self.public_key}%").any?
+ if self.class.where('uuid != ? and public_key like ?',
+ uuid || '', "%#{self.public_key}%").any?
errors.add(:public_key, "already exists in the database, use a different key.")
return false
end
# Get the commit hash for the upper bound
max_hash = nil
- IO.foreach("|git rev-list --max-count=1 #{maximum.shellescape} --") do |line|
+ git_max_hash_cmd = "git rev-list --max-count=1 #{maximum.shellescape} --"
+ IO.foreach("|#{git_max_hash_cmd}") do |line|
max_hash = line.strip
end
- # If not found or string is invalid, nothing else to do
- return [] if !max_hash or !git_check_ref_format(max_hash)
+ # If not found, nothing else to do
+ if !max_hash
+ logger.warn "no refs found looking for max_hash: `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` returned no output"
+ return []
+ end
+
+ # If string is invalid, nothing else to do
+ if !git_check_ref_format(max_hash)
+ logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` was invalid for max_hash: #{max_hash}"
+ return []
+ end
resolved_exclude = nil
if exclude
if minimum
# Get the commit hash for the lower bound
min_hash = nil
- IO.foreach("|git rev-list --max-count=1 #{minimum.shellescape} --") do |line|
+ git_min_hash_cmd = "git rev-list --max-count=1 #{minimum.shellescape} --"
+ IO.foreach("|#{git_min_hash_cmd}") do |line|
min_hash = line.strip
end
- # If not found or string is invalid, nothing else to do
- return [] if !min_hash or !git_check_ref_format(min_hash)
+ # If not found, nothing else to do
+ if !min_hash
+ logger.warn "no refs found looking for min_hash: `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` returned no output"
+ return []
+ end
+
+ # If string is invalid, nothing else to do
+ if !git_check_ref_format(min_hash)
+ logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` was invalid for min_hash: #{min_hash}"
+ return []
+ end
# Now find all commits between them
IO.foreach("|git rev-list #{min_hash.shellescape}..#{max_hash.shellescape} --") do |line|
# Do not use this file for site configuration. Create application.yml
# instead (see application.yml.example).
-
-development:
- force_ssl: false
- cache_classes: false
- whiny_nils: true
- consider_all_requests_local: true
- action_controller.perform_caching: false
- action_mailer.raise_delivery_errors: false
- action_mailer.perform_deliveries: false
- active_support.deprecation: :log
- action_dispatch.best_standards_support: :builtin
- active_record.mass_assignment_sanitizer: :strict
- active_record.auto_explain_threshold_in_seconds: 0.5
- assets.compress: false
- assets.debug: true
- local_modified: "<%= '-modified' if `git status -s` != '' %>"
-
-production:
- force_ssl: true
- cache_classes: true
- consider_all_requests_local: false
- action_controller.perform_caching: true
- serve_static_assets: false
- assets.compress: true
- assets.compile: false
- assets.digest: true
-
-test:
- force_ssl: false
- cache_classes: true
- serve_static_assets: true
- static_cache_control: public, max-age=3600
- whiny_nils: true
- consider_all_requests_local: true
- action_controller.perform_caching: false
- action_dispatch.show_exceptions: false
- action_controller.allow_forgery_protection: false
- action_mailer.delivery_method: :test
- active_support.deprecation: :stderr
- active_record.mass_assignment_sanitizer: :strict
- uuid_prefix: zzzzz
- sso_app_id: arvados-server
- sso_app_secret: <%= rand(2**512).to_s(36) %>
- sso_provider_url: http://localhost:3002
- secret_token: <%= rand(2**512).to_s(36) %>
- blob_signing_key: zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc
- user_profile_notification_address: arvados@example.com
- workbench_address: https://localhost:3001/
- git_repositories_dir: <%= Rails.root.join 'tmp', 'git', 'test' %>
- git_internal_dir: <%= Rails.root.join 'tmp', 'internal.git' %>
+#
+# The order of precedence is:
+# 1. config/environments/{RAILS_ENV}.rb (deprecated)
+# 2. Section in application.yml corresponding to RAILS_ENV (e.g., development)
+# 3. Section in application.yml called "common"
+# 4. Section in application.default.yml corresponding to RAILS_ENV
+# 5. Section in application.default.yml called "common"
common:
+ ###
+ ### Essential site configuration
+ ###
+
# The prefix used for all database identifiers to identify the record as
# originating from this site. Must be exactly 5 alphanumeric characters
# (lowercase ASCII letters and digits).
- uuid_prefix: <%= Digest::MD5.hexdigest(`hostname`).to_i(16).to_s(36)[0..4] %>
+ uuid_prefix: ~
- # If not false, this is the hostname that will be used for root_url and
- # advertised in the discovery document. By default, use the default Rails
- # logic for deciding on a hostname.
- host: false
+ # secret_token is a string of alphanumeric characters used by Rails
+ # to sign session tokens. IMPORTANT: This is a site secret. It
+ # should be at least 50 characters.
+ secret_token: ~
- # Base part of SSH git clone url given with repository resources. If
- # true, the default "git@git.(uuid_prefix).arvadosapi.com:" is
- # used. If false, SSH clone URLs are not advertised. Include a
- # trailing ":" or "/" if needed: it will not be added automatically.
- git_repo_ssh_base: true
+ # blob_signing_key is a string of alphanumeric characters used to
+ # generate permission signatures for Keep locators. It must be
+ # identical to the permission key given to Keep. IMPORTANT: This is
+ # a site secret. It should be at least 50 characters.
+ blob_signing_key: ~
- # Base part of HTTPS git clone urls given with repository
- # resources. This is expected to be an arv-git-httpd service which
- # accepts API tokens as HTTP-auth passwords. If true, the default
- # "https://git.(uuid_prefix).arvadosapi.com/" is used. If false,
- # HTTPS clone URLs are not advertised. Include a trailing ":" or "/"
- # if needed: it will not be added automatically.
- git_repo_https_base: true
+ # These settings are provided by your OAuth2 provider (e.g.,
+ # sso-provider).
+ sso_app_secret: ~
+ sso_app_id: ~
+ sso_provider_url: ~
# If this is not false, HTML requests at the API server's root URL
# are redirected to this location, and it is provided in the text of
# to log in.
workbench_address: false
+ # The ARVADOS_WEBSOCKETS environment variable determines whether to
+ # serve http, websockets, or both.
+ #
+ # If ARVADOS_WEBSOCKETS="true", http and websockets are both served
+ # from the same process.
+ #
+ # If ARVADOS_WEBSOCKETS="ws-only", only websockets is served.
+ #
+ # If ARVADOS_WEBSOCKETS="false" or not set at all, only http is
+ # served. In this case, you should have a separate process serving
+ # websockets, and the address of that service should be given here
+ # as websocket_address.
+ #
+ # If websocket_address is false (which is the default), the
+ # discovery document will tell clients to use the current server as
+ # the websocket service, or (if the current server does not have
+ # websockets enabled) not to use websockets at all.
+ #
+ # Example: Clients will connect to the specified endpoint.
+ #websocket_address: wss://127.0.0.1:3333/websocket
+ # Default: Clients will connect to this server if it's running
+ # websockets, otherwise none at all.
+ websocket_address: false
+
# Git repositories must be readable by api server, or you won't be
# able to submit crunch jobs. To pass the test suites, put a clone
# of the arvados tree in {git_repositories_dir}/arvados.git or
# subdirectory of {git_repositiories_dir}.
git_internal_dir: /var/lib/arvados/internal.git
- # :none or :slurm_immediate
- crunch_job_wrapper: :none
-
- # username, or false = do not set uid when running jobs.
- crunch_job_user: crunch
-
- # The web service must be able to create/write this file, and
- # crunch-job must be able to stat() it.
- crunch_refresh_trigger: /tmp/crunch_refresh_trigger
-
- # These two settings control how frequently log events are flushed to the
- # database. Log lines are buffered until either crunch_log_bytes_per_event
- # has been reached or crunch_log_seconds_between_events has elapsed since
- # the last flush.
- crunch_log_bytes_per_event: 4096
- crunch_log_seconds_between_events: 1
-
- # The sample period for throttling logs, in seconds.
- crunch_log_throttle_period: 60
-
- # Maximum number of bytes that job can log over crunch_log_throttle_period
- # before being silenced until the end of the period.
- crunch_log_throttle_bytes: 65536
-
- # Maximum number of lines that job can log over crunch_log_throttle_period
- # before being silenced until the end of the period.
- crunch_log_throttle_lines: 1024
-
- # Maximum bytes that may be logged by a single job. Log bytes that are
- # silenced by throttling are not counted against this total.
- crunch_limit_log_bytes_per_job: 67108864
+ # Default replication level for collections. This is used when a
+ # collection's replication_desired attribute is nil.
+ default_collection_replication: 2
- # Path to dns server configuration directory
- # (e.g. /etc/unbound.d/conf.d). If false, do not write any config
- # files or touch restart.txt (see below).
- dns_server_conf_dir: false
- # Template file for the dns server host snippets. See
- # unbound.template in this directory for an example. If false, do
- # not write any config files.
- dns_server_conf_template: false
+ ###
+ ### Overriding default advertised hostnames/URLs
+ ###
- # String to write to {dns_server_conf_dir}/restart.txt (with a
- # trailing newline) after updating local data. If false, do not
- # open or write the restart.txt file.
- dns_server_reload_command: false
+ # If not false, this is the hostname that will be used for root_url and
+ # advertised in the discovery document. By default, use the default Rails
+ # logic for deciding on a hostname.
+ host: false
- # Command to run after each DNS update. Template variables will be
- # substituted; see the "unbound" example below. If false, do not run
- # a command.
- dns_server_update_command: false
+ # Base part of SSH git clone url given with repository resources. If
+ # true, the default "git@git.(uuid_prefix).arvadosapi.com:" is
+ # used. If false, SSH clone URLs are not advertised. Include a
+ # trailing ":" or "/" if needed: it will not be added automatically.
+ git_repo_ssh_base: true
- ## Example for unbound:
- #dns_server_conf_dir: /etc/unbound/conf.d
- #dns_server_conf_template: /path/to/your/api/server/config/unbound.template
- ## ...plus one of the following two methods of reloading:
- #dns_server_reload_command: unbound-control reload
- #dns_server_update_command: echo %{hostname} %{hostname}.%{uuid_prefix} %{hostname}.%{uuid_prefix}.arvadosapi.com %{ptr_domain} | xargs -n 1 unbound-control local_data_remove && unbound-control local_data %{hostname} IN A %{ip_address} && unbound-control local_data %{hostname}.%{uuid_prefix} IN A %{ip_address} && unbound-control local_data %{hostname}.%{uuid_prefix}.arvadosapi.com IN A %{ip_address} && unbound-control local_data %{ptr_domain}. IN PTR %{hostname}.%{uuid_prefix}.arvadosapi.com
+ # Base part of HTTPS git clone urls given with repository
+ # resources. This is expected to be an arv-git-httpd service which
+ # accepts API tokens as HTTP-auth passwords. If true, the default
+ # "https://git.(uuid_prefix).arvadosapi.com/" is used. If false,
+ # HTTPS clone URLs are not advertised. Include a trailing ":" or "/"
+ # if needed: it will not be added automatically.
+ git_repo_https_base: true
- compute_node_domain: false
- compute_node_nameservers:
- - 192.168.1.1
- # The version below is suitable for AWS.
- # To use it, copy it to your application.yml, uncomment, and change <%# to <%=
- # compute_node_nameservers: <%#
- # require 'net/http'
- # ['local', 'public'].collect do |iface|
- # Net::HTTP.get(URI("http://169.254.169.254/latest/meta-data/#{iface}-ipv4")).match(/^[\d\.]+$/)[0]
- # end << '172.16.0.23'
- # %>
+ ###
+ ### New user and & email settings
+ ###
- accept_api_token: {}
+ # Config parameters to automatically setup new users.
+ # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on.
+ # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup.
+ auto_setup_new_users: false
+ auto_setup_new_users_with_vm_uuid: false
+ auto_setup_new_users_with_repository: false
+ auto_setup_name_blacklist: [arvados, git, gitolite, gitolite-admin, root, syslog]
# When new_users_are_active is set to true, the user agreement check is skipped.
new_users_are_active: false
- admin_notifier_email_from: arvados@example.com
- email_subject_prefix: "[ARVADOS] "
- user_notifier_email_from: arvados@example.com
- new_user_notification_recipients: [ ]
- new_inactive_user_notification_recipients: [ ]
-
# The e-mail address of the user you would like to become marked as an admin
# user on their first login.
# In the default configuration, authentication happens through the Arvados SSO
- # server, which uses openid against Google's servers, so in that case this
+ # server, which uses OAuth2 against Google's servers, so in that case this
# should be an address associated with a Google account.
auto_admin_user: false
# other admin users exist will automatically become an admin user.
auto_admin_first_user: false
- ## Set Time.zone default to the specified zone and make Active
- ## Record auto-convert to this zone. Run "rake -D time" for a list
- ## of tasks for finding time zone names. Default is UTC.
- #time_zone: Central Time (US & Canada)
-
- ## Default encoding used in templates for Ruby 1.9.
- encoding: utf-8
-
- # Enable the asset pipeline
- assets.enabled: true
-
- # Version of your assets, change this if you want to expire all your assets
- assets.version: "1.0"
+ # Email address to notify whenever a user creates a profile for the
+ # first time
+ user_profile_notification_address: false
- arvados_theme: default
+ admin_notifier_email_from: arvados@example.com
+ email_subject_prefix: "[ARVADOS] "
+ user_notifier_email_from: arvados@example.com
+ new_user_notification_recipients: [ ]
+ new_inactive_user_notification_recipients: [ ]
- # The ARVADOS_WEBSOCKETS environment variable determines whether to
- # serve http, websockets, or both.
- #
- # If ARVADOS_WEBSOCKETS="true", http and websockets are both served
- # from the same process.
- #
- # If ARVADOS_WEBSOCKETS="ws-only", only websockets is served.
- #
- # If ARVADOS_WEBSOCKETS="false" or not set at all, only http is
- # served. In this case, you should have a separate process serving
- # websockets, and the address of that service should be given here
- # as websocket_address.
- #
- # If websocket_address is false (which is the default), the
- # discovery document will tell clients to use the current server as
- # the websocket service, or (if the current server does not have
- # websockets enabled) not to use websockets at all.
- #
- # Example: Clients will connect to the specified endpoint.
- #websocket_address: wss://127.0.0.1:3333/websocket
- # Default: Clients will connect to this server if it's running
- # websockets, otherwise none at all.
- websocket_address: false
- # blob_signing_key is a string of alphanumeric characters used to
- # generate permission signatures for Keep locators. It must be
- # identical to the permission key given to Keep. IMPORTANT: This is
- # a site secret. It should be at least 50 characters.
- blob_signing_key: ~
+ ###
+ ### Limits, timeouts and durations
+ ###
# Lifetime (in seconds) of blob permission signatures generated by
# the API server. This determines how long a client can take (after
# The default is 2 weeks.
blob_signature_ttl: 1209600
- # Allow clients to create collections by providing a manifest with
- # unsigned data blob locators. IMPORTANT: This effectively disables
- # access controls for data stored in Keep: a client who knows a hash
- # can write a manifest that references the hash, pass it to
- # collections.create (which will create a permission link), use
- # collections.get to obtain a signature for that data locator, and
- # use that signed locator to retrieve the data from Keep. Therefore,
- # do not turn this on if your users expect to keep data private from
- # one another!
- permit_create_collection_with_unsigned_manifest: false
-
- # secret_token is a string of alphanumeric characters used by Rails
- # to sign session tokens. IMPORTANT: This is a site secret. It
- # should be at least 50 characters.
- secret_token: ~
-
- # Email address to notify whenever a user creates a profile for the
- # first time
- user_profile_notification_address: false
-
- default_openid_prefix: https://www.google.com/accounts/o8/id
-
- # Config parameters to automatically setup new users.
- # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on.
- # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup.
- auto_setup_new_users: false
- auto_setup_new_users_with_vm_uuid: false
- auto_setup_new_users_with_repository: false
- auto_setup_name_blacklist: [arvados, git, gitolite, gitolite-admin, root, syslog]
-
- # source_version
- source_version: "<%= `git log -n 1 --format=%h`.strip %>"
- local_modified: false
-
# Default lifetime for ephemeral collections: 2 weeks.
default_trash_lifetime: 1209600
- # Permit insecure (OpenSSL::SSL::VERIFY_NONE) connections to the Single Sign
- # On (sso) server. Should only be enabled during development when the SSO
- # server is using a self-signed cert.
- sso_insecure: false
-
- # These settings are provided by your OAuth2 provider (e.g.,
- # sso-provider).
- sso_app_id: ~
- sso_app_secret: ~
- sso_provider_url: ~
-
- # Default replication level for collections. This is used when a
- # collection's replication_desired attribute is nil.
- default_collection_replication: 2
-
# Maximum size (in bytes) allowed for a single API request. This
# limit is published in the discovery document for use by clients.
# Note: You must separately configure the upstream web server or
# go down.
max_compute_nodes: 64
+ # These two settings control how frequently log events are flushed to the
+ # database. Log lines are buffered until either crunch_log_bytes_per_event
+ # has been reached or crunch_log_seconds_between_events has elapsed since
+ # the last flush.
+ crunch_log_bytes_per_event: 4096
+ crunch_log_seconds_between_events: 1
+
+ # The sample period for throttling logs, in seconds.
+ crunch_log_throttle_period: 60
+
+ # Maximum number of bytes that job can log over crunch_log_throttle_period
+ # before being silenced until the end of the period.
+ crunch_log_throttle_bytes: 65536
+
+ # Maximum number of lines that job can log over crunch_log_throttle_period
+ # before being silenced until the end of the period.
+ crunch_log_throttle_lines: 1024
+
+ # Maximum bytes that may be logged by a single job. Log bytes that are
+ # silenced by throttling are not counted against this total.
+ crunch_limit_log_bytes_per_job: 67108864
+
+
+ ###
+ ### Crunch, DNS & compute node management
+ ###
+
# Docker image to be used when none found in runtime_constraints of a job
default_docker_image_for_jobs: false
+ # :none or :slurm_immediate
+ crunch_job_wrapper: :none
+
+ # username, or false = do not set uid when running jobs.
+ crunch_job_user: crunch
+
+ # The web service must be able to create/write this file, and
+ # crunch-job must be able to stat() it.
+ crunch_refresh_trigger: /tmp/crunch_refresh_trigger
+
+ # Path to dns server configuration directory
+ # (e.g. /etc/unbound.d/conf.d). If false, do not write any config
+ # files or touch restart.txt (see below).
+ dns_server_conf_dir: false
+
+ # Template file for the dns server host snippets. See
+ # unbound.template in this directory for an example. If false, do
+ # not write any config files.
+ dns_server_conf_template: false
+
+ # String to write to {dns_server_conf_dir}/restart.txt (with a
+ # trailing newline) after updating local data. If false, do not
+ # open or write the restart.txt file.
+ dns_server_reload_command: false
+
+ # Command to run after each DNS update. Template variables will be
+ # substituted; see the "unbound" example below. If false, do not run
+ # a command.
+ dns_server_update_command: false
+
+ ## Example for unbound:
+ #dns_server_conf_dir: /etc/unbound/conf.d
+ #dns_server_conf_template: /path/to/your/api/server/config/unbound.template
+ ## ...plus one of the following two methods of reloading:
+ #dns_server_reload_command: unbound-control reload
+ #dns_server_update_command: echo %{hostname} %{hostname}.%{uuid_prefix} %{hostname}.%{uuid_prefix}.arvadosapi.com %{ptr_domain} | xargs -n 1 unbound-control local_data_remove && unbound-control local_data %{hostname} IN A %{ip_address} && unbound-control local_data %{hostname}.%{uuid_prefix} IN A %{ip_address} && unbound-control local_data %{hostname}.%{uuid_prefix}.arvadosapi.com IN A %{ip_address} && unbound-control local_data %{ptr_domain}. IN PTR %{hostname}.%{uuid_prefix}.arvadosapi.com
+
+ compute_node_domain: false
+ compute_node_nameservers:
+ - 192.168.1.1
+
# Hostname to assign to a compute node when it sends a "ping" and the
# hostname in its Node record is nil.
# During bootstrapping, the "ping" script is expected to notice the
# assign_node_hostname: compute%<slot_number>04d
# (See http://ruby-doc.org/core-2.2.2/Kernel.html#method-i-format for more.)
assign_node_hostname: compute%<slot_number>d
+
+
+ ###
+ ### Remaining assorted configuration options.
+ ###
+
+ arvados_theme: default
+
+ # Permit insecure (OpenSSL::SSL::VERIFY_NONE) connections to the Single Sign
+ # On (sso) server. Should only be enabled during development when the SSO
+ # server is using a self-signed cert.
+ sso_insecure: false
+
+ ## Set Time.zone default to the specified zone and make Active
+ ## Record auto-convert to this zone. Run "rake -D time" for a list
+ ## of tasks for finding time zone names. Default is UTC.
+ #time_zone: Central Time (US & Canada)
+
+ ## Default encoding used in templates for Ruby 1.9.
+ encoding: utf-8
+
+ # Enable the asset pipeline
+ assets.enabled: true
+
+ # Version of your assets, change this if you want to expire all your assets
+ assets.version: "1.0"
+
+ # Allow clients to create collections by providing a manifest with
+ # unsigned data blob locators. IMPORTANT: This effectively disables
+ # access controls for data stored in Keep: a client who knows a hash
+ # can write a manifest that references the hash, pass it to
+ # collections.create (which will create a permission link), use
+ # collections.get to obtain a signature for that data locator, and
+ # use that signed locator to retrieve the data from Keep. Therefore,
+ # do not turn this on if your users expect to keep data private from
+ # one another!
+ permit_create_collection_with_unsigned_manifest: false
+
+ default_openid_prefix: https://www.google.com/accounts/o8/id
+
+ # source_version
+ source_version: "<%= `git log -n 1 --format=%h`.strip %>"
+ local_modified: false
+
+
+development:
+ force_ssl: false
+ cache_classes: false
+ whiny_nils: true
+ consider_all_requests_local: true
+ action_controller.perform_caching: false
+ action_mailer.raise_delivery_errors: false
+ action_mailer.perform_deliveries: false
+ active_support.deprecation: :log
+ action_dispatch.best_standards_support: :builtin
+ active_record.mass_assignment_sanitizer: :strict
+ active_record.auto_explain_threshold_in_seconds: 0.5
+ assets.compress: false
+ assets.debug: true
+ local_modified: "<%= '-modified' if `git status -s` != '' %>"
+
+production:
+ force_ssl: true
+ cache_classes: true
+ consider_all_requests_local: false
+ action_controller.perform_caching: true
+ serve_static_assets: false
+ assets.compress: true
+ assets.compile: false
+ assets.digest: true
+
+test:
+ force_ssl: false
+ cache_classes: true
+ serve_static_assets: true
+ static_cache_control: public, max-age=3600
+ whiny_nils: true
+ consider_all_requests_local: true
+ action_controller.perform_caching: false
+ action_dispatch.show_exceptions: false
+ action_controller.allow_forgery_protection: false
+ action_mailer.delivery_method: :test
+ active_support.deprecation: :stderr
+ active_record.mass_assignment_sanitizer: :strict
+ uuid_prefix: zzzzz
+ sso_app_id: arvados-server
+ sso_app_secret: <%= rand(2**512).to_s(36) %>
+ sso_provider_url: http://localhost:3002
+ secret_token: <%= rand(2**512).to_s(36) %>
+ blob_signing_key: zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc
+ user_profile_notification_address: arvados@example.com
+ workbench_address: https://localhost:3001/
+ git_repositories_dir: <%= Rails.root.join 'tmp', 'git', 'test' %>
+ git_internal_dir: <%= Rails.root.join 'tmp', 'internal.git' %>
# 4. Section in application.default.yml corresponding to RAILS_ENV
# 5. Section in application.default.yml called "common"
-development:
- # Mandatory site secrets. See application.default.yml for more info.
+production:
+ # Mandatory site configuration. See application.default.yml and
+ # http://http://doc.arvados.org/install/install-api-server.html#configure_application
+ # for more information.
+ uuid_prefix: ~
secret_token: ~
blob_signing_key: ~
- uuid_prefix: bogus
- workbench_address: https://localhost:3031
+ sso_app_secret: ~
+ sso_app_id: ~
+ sso_provider_url: ~
+ workbench_address: ~
+ websockets_address: ~
+ #git_repositories_dir: ~
+ #git_internal_dir: ~
-production:
- # Mandatory site secrets. See application.default.yml for more info.
+development:
+ # Separate settings for development configuration.
+ uuid_prefix: ~
secret_token: ~
blob_signing_key: ~
- uuid_prefix: bogus
- workbench_address: https://workbench.bogus.arvadosapi.com
- sso_app_id: arvados-server
+ sso_app_id: ~
sso_app_secret: ~
- sso_provider_url: https://login.bogus.arvadosapi.com
+ sso_provider_url: ~
+ workbench_address: ~
+ websockets_address: ~
+ #git_repositories_dir: ~
+ #git_internal_dir: ~
test:
# Tests should be able to run without further configuration, but if you do
+++ /dev/null
-Server::Application.configure do
- config.accept_api_token = { 'foobar' => true }
-end
# Execute query and actually send the matching log rows
count = 0
- limit = 20
+ limit = 10
logs.limit(limit).each do |l|
ws.send(l.as_api_response.to_json)
# Number of rows returned was capped by limit(), we need to schedule
# another query to get more logs (will start from last_log_id
# reported by current query)
- EventMachine::schedule do
+ EventMachine::next_tick do
push_events ws, nil
end
elsif !notify_id.nil? and (ws.last_log_id.nil? or notify_id > ws.last_log_id)
# No filters set up, so just record the sequence number
ws.last_log_id = notify_id
end
+ rescue ArgumentError => e
+ # There was some kind of user error.
+ Rails.logger.warn "Error publishing event: #{$!}"
+ ws.send ({status: 500, message: $!}.to_json)
+ ws.close
rescue => e
Rails.logger.warn "Error publishing event: #{$!}"
Rails.logger.warn "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
- ws.send ({status: 500, message: 'error'}.to_json)
+ ws.send ({status: 500, message: $!}.to_json)
ws.close
# These exceptions typically indicate serious server trouble:
# out of memory issues, database connection problems, etc. Go ahead and
head_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
properties: {}
+project_viewer_member_of_all_users_group:
+ uuid: zzzzz-o0j2j-cdnq6627g0h0r2x
+ owner_uuid: zzzzz-tpzed-000000000000000
+ created_at: 2015-07-28T21:34:41.361747000Z
+ modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+ modified_by_user_uuid: zzzzz-tpzed-000000000000000
+ modified_at: 2015-07-28T21:34:41.361747000Z
+ updated_at: 2015-07-28T21:34:41.361747000Z
+ tail_uuid: zzzzz-tpzed-projectviewer1a
+ link_class: permission
+ name: can_read
+ head_uuid: zzzzz-j7d0g-fffffffffffffff
+ properties: {}
+
project_viewer_can_read_project:
uuid: zzzzz-o0j2j-projviewerreadp
owner_uuid: zzzzz-tpzed-000000000000000
end
end
+ test "get_all_permissions takes into account is_active flag" do
+ r = nil
+ act_as_user users(:active) do
+ r = Repository.create! name: 'active/testrepo'
+ end
+ act_as_system_user do
+ u = users(:active)
+ u.is_active = false
+ u.save!
+ end
+ authorize_with :admin
+ get :get_all_permissions
+ assert_response :success
+ json_response['repositories'].each do |r|
+ r['user_permissions'].each do |user_uuid, perms|
+ refute_equal user_uuid, users(:active).uuid
+ end
+ end
+ end
+
test "get_all_permissions does not give any access to user without permission" do
viewer_uuid = users(:project_viewer).uuid
assert_equal(authorized_keys(:project_viewer).authorized_user_uuid,
end
end
- test "get_all_permissions lists repos with no authorized keys" do
+ test "get_all_permissions lists all repos regardless of permissions" do
+ act_as_system_user do
+ # Create repos that could potentially be left out of the
+ # permission list by accident.
+
+ # No authorized_key, no username (this can't even be done
+ # without skipping validations)
+ r = Repository.create name: 'root/testrepo'
+ assert r.save validate: false
+
+ r = Repository.create name: 'invalid username / repo name', owner_uuid: users(:inactive).uuid
+ assert r.save validate: false
+ end
+ authorize_with :admin
+ get :get_all_permissions
+ assert_response :success
+ assert_equal(Repository.count, json_response["repositories"].size)
+ end
+
+ test "get_all_permissions lists user permissions for users with no authorized keys" do
authorize_with :admin
AuthorizedKey.destroy_all
get :get_all_permissions
assert_response :success
assert_equal(Repository.count, json_response["repositories"].size)
- assert(json_response["repositories"].any? do |repo|
- repo["user_permissions"].empty?
- end, "test is invalid - all repositories have authorized keys")
+ repos_with_perms = []
+ json_response['repositories'].each do |repo|
+ if repo['user_permissions'].any?
+ repos_with_perms << repo['uuid']
+ end
+ end
+ assert_not_empty repos_with_perms, 'permissions are missing'
+ end
+
+ # Ensure get_all_permissions correctly describes what the normal
+ # permission system would do.
+ test "get_all_permissions obeys group permissions" do
+ act_as_user system_user do
+ r = Repository.create!(name: 'admin/groupcanwrite', owner_uuid: users(:admin).uuid)
+ g = Group.create!(group_class: 'group', name: 'repo-writers')
+ u1 = users(:active)
+ u2 = users(:spectator)
+ Link.create!(tail_uuid: g.uuid, head_uuid: r.uuid, link_class: 'permission', name: 'can_manage')
+ Link.create!(tail_uuid: u1.uuid, head_uuid: g.uuid, link_class: 'permission', name: 'can_write')
+ Link.create!(tail_uuid: u2.uuid, head_uuid: g.uuid, link_class: 'permission', name: 'can_read')
+
+ r = Repository.create!(name: 'admin/groupreadonly', owner_uuid: users(:admin).uuid)
+ g = Group.create!(group_class: 'group', name: 'repo-readers')
+ u1 = users(:active)
+ u2 = users(:spectator)
+ Link.create!(tail_uuid: g.uuid, head_uuid: r.uuid, link_class: 'permission', name: 'can_read')
+ Link.create!(tail_uuid: u1.uuid, head_uuid: g.uuid, link_class: 'permission', name: 'can_write')
+ Link.create!(tail_uuid: u2.uuid, head_uuid: g.uuid, link_class: 'permission', name: 'can_read')
+ end
+ authorize_with :admin
+ get :get_all_permissions
+ assert_response :success
+ json_response['repositories'].each do |repo|
+ repo['user_permissions'].each do |user_uuid, perms|
+ u = User.find_by_uuid(user_uuid)
+ if perms['can_read']
+ assert u.can? read: repo['uuid']
+ assert_match /R/, perms['gitolite_permissions']
+ else
+ refute_match /R/, perms['gitolite_permissions']
+ end
+ if perms['can_write']
+ assert u.can? write: repo['uuid']
+ assert_match /RW/, perms['gitolite_permissions']
+ else
+ refute_match /W/, perms['gitolite_permissions']
+ end
+ if perms['can_manage']
+ assert u.can? manage: repo['uuid']
+ assert_match /RW/, perms['gitolite_permissions']
+ end
+ end
+ end
end
test "default index includes fetch_url" do
assert_empty(json_response.
select { |login| login["user_uuid"] == spectator_uuid })
end
+
+ test "logins without ssh keys are listed" do
+ u, vm = nil
+ act_as_system_user do
+ u = create :active_user, first_name: 'Bob', last_name: 'Blogin'
+ vm = VirtualMachine.create! hostname: 'foo.shell'
+ Link.create!(tail_uuid: u.uuid,
+ head_uuid: vm.uuid,
+ link_class: 'permission',
+ name: 'can_login',
+ properties: {'username' => 'bobblogin'})
+ end
+ authorize_with :admin
+ get :logins, id: vm.uuid
+ assert_response :success
+ assert_equal 1, json_response['items'].length
+ assert_equal nil, json_response['items'][0]['public_key']
+ assert_equal nil, json_response['items'][0]['authorized_key_uuid']
+ assert_equal u.uuid, json_response['items'][0]['user_uuid']
+ assert_equal 'bobblogin', json_response['items'][0]['username']
+ end
end
assert_equal 200, status
end
- test "connect, subscribe, get event" do
+ def subscribe_test
state = 1
spec = nil
ev_uuid = nil
assert_equal spec.uuid, ev_uuid
end
+ test "connect, subscribe, get event" do
+ subscribe_test()
+ end
+
test "connect, subscribe, get two events" do
state = 1
spec = nil
end
+ test "connect, subscribe with invalid filter" do
+ state = 1
+ human = nil
+ human_ev_uuid = nil
+
+ authorize_with :admin
+
+ ws_helper :admin do |ws|
+ ws.on :open do |event|
+ # test that #6451 is fixed (invalid filter crashes websockets)
+ ws.send ({method: 'subscribe', filters: [['object_blarg', 'is_a', 'arvados#human']]}.to_json)
+ end
+
+ ws.on :message do |event|
+ d = Oj.load event.data
+ case state
+ when 1
+ assert_equal 200, d["status"]
+ Specimen.create
+ human = Human.create
+ state = 2
+ when 2
+ assert_equal 500, d["status"]
+ state = 3
+ ws.close
+ when 3
+ assert false, "Should not get any more events"
+ end
+ end
+
+ end
+
+ assert_equal 3, state
+
+ # Try connecting again, ensure that websockets server is still running and
+ # didn't crash per #6451
+ subscribe_test()
+
+ end
+
+
end
require 'test_helper'
class AuthorizedKeyTest < ActiveSupport::TestCase
- # test "the truth" do
- # assert true
- # end
+ TEST_KEY = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCf5aTI55uyWr44TckP/ELUAyPsdnf5fTZDcSDN4qiMZYAL7TYV2ixwnbPObLObM0GmHSSFLV1KqsuFICUPgkyKoHbAH6XPgmtfOLU60VkGf1v5uxQ/kXCECRCJmPb3K9dIXGEw+1DXPdOV/xG7rJNvo4a9WK9iqqZr8p+VGKM6C017b8BDLk0tuEEjZ5jXcT/ka/hTScxWkKgF6auPOVQ79OA5+0VaYm4uQLzVUdgwVUPWQQecRrtnc08XYM1htpcLDIAbWfUNK7uE6XR3/OhtrJGf05FGbtGguPgi33F9W3Q3yw6saOK5Y3TfLbskgFaEdLgzqK/QSBRk2zBF49Tj test@localhost"
+
+ test 'create and update key' do
+ u1 = users(:active)
+ act_as_user u1 do
+ ak = AuthorizedKey.new(name: "foo", public_key: TEST_KEY, authorized_user_uuid: u1.uuid)
+ assert ak.save, ak.errors.full_messages.to_s
+ ak.name = "bar"
+ assert ak.valid?, ak.errors.full_messages.to_s
+ assert ak.save, ak.errors.full_messages.to_s
+ end
+ end
+
+ test 'duplicate key not permitted' do
+ u1 = users(:active)
+ act_as_user u1 do
+ ak = AuthorizedKey.new(name: "foo", public_key: TEST_KEY, authorized_user_uuid: u1.uuid)
+ assert ak.save
+ end
+ u2 = users(:spectator)
+ act_as_user u2 do
+ ak2 = AuthorizedKey.new(name: "bar", public_key: TEST_KEY, authorized_user_uuid: u2.uuid)
+ refute ak2.valid?
+ refute ak2.save
+ assert_match /already exists/, ak2.errors.full_messages.to_s
+ end
+ end
+
+ test 'attach key to wrong user account' do
+ act_as_user users(:active) do
+ ak = AuthorizedKey.new(name: "foo", public_key: TEST_KEY)
+ ak.authorized_user_uuid = users(:spectator).uuid
+ refute ak.save
+ ak.uuid = nil
+ ak.authorized_user_uuid = users(:admin).uuid
+ refute ak.save
+ ak.uuid = nil
+ ak.authorized_user_uuid = users(:active).uuid
+ assert ak.save, ak.errors.full_messages.to_s
+ ak.authorized_user_uuid = users(:admin).uuid
+ refute ak.save
+ end
+ end
end
self._total -= obj.cache_size
del self._entries[obj.cache_priority]
if obj.cache_uuid:
- del self._by_uuid[obj.cache_uuid]
+ self._by_uuid[obj.cache_uuid].remove(obj)
+ if not self._by_uuid[obj.cache_uuid]:
+ del self._by_uuid[obj.cache_uuid]
obj.cache_uuid = None
if clear:
_logger.debug("InodeCache cleared %i total now %i", obj.inode, self._total)
self._entries[obj.cache_priority] = obj
obj.cache_uuid = obj.uuid()
if obj.cache_uuid:
- self._by_uuid[obj.cache_uuid] = obj
+ if obj.cache_uuid not in self._by_uuid:
+ self._by_uuid[obj.cache_uuid] = [obj]
+ else:
+ if obj not in self._by_uuid[obj.cache_uuid]:
+ self._by_uuid[obj.cache_uuid].append(obj)
self._total += obj.objsize()
- _logger.debug("InodeCache touched %i (size %i) total now %i", obj.inode, obj.objsize(), self._total)
+ _logger.debug("InodeCache touched %i (size %i) (uuid %s) total now %i", obj.inode, obj.objsize(), obj.cache_uuid, self._total)
self.cap_cache()
else:
obj.cache_priority = None
def find(self, uuid):
return self._by_uuid.get(uuid)
+ def clear(self):
+ self._entries.clear()
+ self._by_uuid.clear()
+ self._total = 0
+
class Inodes(object):
"""Manage the set of inodes. This is the mapping from a numeric id
to a concrete File or Directory object"""
def invalidate_entry(self, inode, name):
llfuse.invalidate_entry(inode, name)
+ def clear(self):
+ self.inode_cache.clear()
+
+ for k,v in self._entries.items():
+ try:
+ v.finalize()
+ except Exception as e:
+ _logger.exception("Error during finalize of inode %i", k)
+
+ self._entries.clear()
+
def catch_exceptions(orig_func):
"""Catch uncaught exceptions and log them consistently."""
self.events.close()
self.events = None
- for k,v in self.inodes.items():
- try:
- v.finalize()
- except Exception as e:
- _logger.exception("Error during finalize of inode %i", k)
- self.inodes = None
+ self.inodes.clear()
def access(self, inode, mode, ctx):
return True
def on_event(self, ev):
if 'event_type' in ev:
with llfuse.lock:
- item = self.inodes.inode_cache.find(ev["object_uuid"])
- if item is not None:
- item.invalidate()
- if ev["object_kind"] == "arvados#collection":
- new_attr = ev.get("properties") and ev["properties"].get("new_attributes") and ev["properties"]["new_attributes"]
-
- # new_attributes.modified_at currently lacks subsecond precision (see #6347) so use event_at which
- # should always be the same.
- #record_version = (new_attr["modified_at"], new_attr["portable_data_hash"]) if new_attr else None
- record_version = (ev["event_at"], new_attr["portable_data_hash"]) if new_attr else None
-
- item.update(to_record_version=record_version)
- else:
- item.update()
+ items = self.inodes.inode_cache.find(ev["object_uuid"])
+ if items is not None:
+ for item in items:
+ item.invalidate()
+ if ev["object_kind"] == "arvados#collection":
+ new_attr = ev.get("properties") and ev["properties"].get("new_attributes") and ev["properties"]["new_attributes"]
+
+ # new_attributes.modified_at currently lacks subsecond precision (see #6347) so use event_at which
+ # should always be the same.
+ #record_version = (new_attr["modified_at"], new_attr["portable_data_hash"]) if new_attr else None
+ record_version = (ev["event_at"], new_attr["portable_data_hash"]) if new_attr else None
+
+ item.update(to_record_version=record_version)
+ else:
+ item.update()
oldowner = ev.get("properties") and ev["properties"].get("old_attributes") and ev["properties"]["old_attributes"].get("owner_uuid")
olditemparent = self.inodes.inode_cache.find(oldowner)
return True
finally:
self._updating_lock.release()
- except arvados.errors.NotFoundError:
- _logger.exception("arv-mount %s: error", self.collection_locator)
+ except arvados.errors.NotFoundError as e:
+ _logger.error("Error fetching collection '%s': %s", self.collection_locator, e)
except arvados.errors.ArgumentError as detail:
_logger.warning("arv-mount %s: error %s", self.collection_locator, detail)
if self.collection_record is not None and "manifest_text" in self.collection_record:
self.inode, self.inodes, self.api, self.num_retries, k))
if e.update():
- self._entries[k] = e
+ if k not in self._entries:
+ self._entries[k] = e
+ else:
+ self.inodes.del_entry(e)
return True
else:
+ self.inodes.del_entry(e)
return False
except Exception as e:
_logger.debug('arv-mount exception keep %s', e)
+ self.inodes.del_entry(e)
return False
def __getitem__(self, item):
--- /dev/null
+import arvados
+import arvados.safeapi
+import arvados_fuse as fuse
+import llfuse
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+import unittest
+import logging
+import multiprocessing
+import run_test_server
+
+logger = logging.getLogger('arvados.arv-mount')
+
+class MountTestBase(unittest.TestCase):
+ def setUp(self, api=None):
+ # The underlying C implementation of open() makes a fstat() syscall
+ # with the GIL still held. When the GETATTR message comes back to
+ # llfuse (which in these tests is in the same interpreter process) it
+ # can't acquire the GIL, so it can't service the fstat() call, so it
+ # deadlocks. The workaround is to run some of our test code in a
+ # separate process. Forturnately the multiprocessing module makes this
+ # relatively easy.
+ self.pool = multiprocessing.Pool(1)
+
+ self.keeptmp = tempfile.mkdtemp()
+ os.environ['KEEP_LOCAL_STORE'] = self.keeptmp
+ self.mounttmp = tempfile.mkdtemp()
+ run_test_server.run()
+ run_test_server.authorize_with("admin")
+ self.api = api if api else arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
+
+ def make_mount(self, root_class, **root_kwargs):
+ self.operations = fuse.Operations(os.getuid(), os.getgid(), enable_write=True)
+ self.operations.inodes.add_entry(root_class(
+ llfuse.ROOT_INODE, self.operations.inodes, self.api, 0, **root_kwargs))
+ llfuse.init(self.operations, self.mounttmp, [])
+ threading.Thread(None, llfuse.main).start()
+ # wait until the driver is finished initializing
+ self.operations.initlock.wait()
+ return self.operations.inodes[llfuse.ROOT_INODE]
+
+ def tearDown(self):
+ self.pool.terminate()
+ self.pool.join()
+ del self.pool
+
+ # llfuse.close is buggy, so use fusermount instead.
+ #llfuse.close(unmount=True)
+
+ count = 0
+ success = 1
+ while (count < 9 and success != 0):
+ success = subprocess.call(["fusermount", "-u", self.mounttmp])
+ time.sleep(0.1)
+ count += 1
+
+ self.operations.destroy()
+
+ os.rmdir(self.mounttmp)
+ shutil.rmtree(self.keeptmp)
+ run_test_server.reset()
+
+ def assertDirContents(self, subdir, expect_content):
+ path = self.mounttmp
+ if subdir:
+ path = os.path.join(path, subdir)
+ self.assertEqual(sorted(expect_content), sorted(llfuse.listdir(path)))
--- /dev/null
+import arvados
+import arvados_fuse as fuse
+import llfuse
+import logging
+import os
+import sys
+import unittest
+from .. import run_test_server
+from ..mount_test_base import MountTestBase
+
+logger = logging.getLogger('arvados.arv-mount')
+
+from performance_profiler import profiled
+
+def fuse_createCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
+ class Test(unittest.TestCase):
+ def runTest(self):
+ self.createCollectionWithMultipleBlocks()
+
+ @profiled
+ def createCollectionWithMultipleBlocks(self):
+ for i in range(0, streams):
+ os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
+
+ # Create files
+ for j in range(0, files_per_stream):
+ with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
+ f.write(data)
+
+ Test().runTest()
+
+def fuse_readContentsFromCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
+ class Test(unittest.TestCase):
+ def runTest(self):
+ self.readContentsFromCollectionWithMultipleBlocks()
+
+ @profiled
+ def readContentsFromCollectionWithMultipleBlocks(self):
+ for i in range(0, streams):
+ d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
+ for j in range(0, files_per_stream):
+ with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
+ self.assertEqual(data, f.read())
+
+ Test().runTest()
+
+def fuse_moveFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
+ class Test(unittest.TestCase):
+ def runTest(self):
+ self.moveFileFromCollectionWithMultipleBlocks()
+
+ @profiled
+ def moveFileFromCollectionWithMultipleBlocks(self):
+ d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+ self.assertIn(filename, d1)
+
+ os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
+
+ d1 = llfuse.listdir(os.path.join(mounttmp))
+ self.assertIn('moved_from_'+stream+'_'+filename, d1)
+
+ d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+ self.assertNotIn(filename, d1)
+
+ Test().runTest()
+
+def fuse_deleteFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
+ class Test(unittest.TestCase):
+ def runTest(self):
+ self.deleteFileFromCollectionWithMultipleBlocks()
+
+ @profiled
+ def deleteFileFromCollectionWithMultipleBlocks(self):
+ os.remove(os.path.join(mounttmp, stream, filename))
+
+ Test().runTest()
+
+# Create a collection with 2 streams, 3 files_per_stream, 2 blocks_per_file, 2**26 bytes_per_block
+class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
+ def setUp(self):
+ super(CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile, self).setUp()
+
+ def test_CreateCollectionWithManyBlocksAndMoveAndDeleteFile(self):
+ collection = arvados.collection.Collection(api_client=self.api)
+ collection.save_new()
+
+ m = self.make_mount(fuse.CollectionDirectory)
+ with llfuse.lock:
+ m.new_collection(collection.api_response(), collection)
+ self.assertTrue(m.writable())
+
+ streams = 2
+ files_per_stream = 3
+ blocks_per_file = 2
+ bytes_per_block = 2**26
+
+ data = 'x' * blocks_per_file * bytes_per_block
+
+ self.pool.apply(fuse_createCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
+
+ collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+ for i in range(0, streams):
+ self.assertIn('./stream' + str(i), collection2["manifest_text"])
+
+ for i in range(0, files_per_stream):
+ self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
+
+ # Read file contents
+ self.pool.apply(fuse_readContentsFromCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
+
+ # Move file0.txt out of the streams into .
+ for i in range(0, streams):
+ self.pool.apply(fuse_moveFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
+
+ collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+ manifest_streams = collection2['manifest_text'].split('\n')
+ self.assertEqual(4, len(manifest_streams))
+
+ for i in range(0, streams):
+ self.assertIn('file0.txt', manifest_streams[0])
+
+ for i in range(0, streams):
+ self.assertNotIn('file0.txt', manifest_streams[i+1])
+
+ for i in range(0, streams):
+ for j in range(1, files_per_stream):
+ self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
+
+ # Delete 'file1.txt' from all the streams
+ for i in range(0, streams):
+ self.pool.apply(fuse_deleteFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
+
+ collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+ manifest_streams = collection2['manifest_text'].split('\n')
+ self.assertEqual(4, len(manifest_streams))
+
+ for i in range(0, streams):
+ self.assertIn('file0.txt', manifest_streams[0])
+
+ self.assertNotIn('file1.txt', collection2['manifest_text'])
+
+ for i in range(0, streams):
+ for j in range(2, files_per_stream):
+ self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
+
+
+def fuse_createCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
+ class Test(unittest.TestCase):
+ def runTest(self):
+ self.createCollectionWithManyFiles()
+
+ @profiled
+ def createCollectionWithManyFiles(self):
+ for i in range(0, streams):
+ os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
+
+ # Create files
+ for j in range(0, files_per_stream):
+ with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
+ f.write(data)
+
+ Test().runTest()
+
+def fuse_readContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
+ class Test(unittest.TestCase):
+ def runTest(self):
+ self.readContentsFromCollectionWithManyFiles()
+
+ @profiled
+ def readContentsFromCollectionWithManyFiles(self):
+ for i in range(0, streams):
+ d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
+ for j in range(0, files_per_stream):
+ with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
+ self.assertEqual(data, f.read())
+
+ Test().runTest()
+
+def fuse_moveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
+ class Test(unittest.TestCase):
+ def runTest(self):
+ self.moveFileFromCollectionWithManyFiles()
+
+ @profiled
+ def moveFileFromCollectionWithManyFiles(self):
+ d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+ self.assertIn(filename, d1)
+
+ os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
+
+ d1 = llfuse.listdir(os.path.join(mounttmp))
+ self.assertIn('moved_from_'+stream+'_'+filename, d1)
+
+ d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+ self.assertNotIn(filename, d1)
+
+ Test().runTest()
+
+def fuse_deleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
+ class Test(unittest.TestCase):
+ def runTest(self):
+ self.deleteFileFromCollectionWithManyFiles()
+
+ @profiled
+ def deleteFileFromCollectionWithManyFiles(self):
+ os.remove(os.path.join(mounttmp, stream, filename))
+
+ Test().runTest()
+
+# Create a collection with two streams, each with 200 files
+class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
+ def setUp(self):
+ super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
+
+ def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
+ collection = arvados.collection.Collection(api_client=self.api)
+ collection.save_new()
+
+ m = self.make_mount(fuse.CollectionDirectory)
+ with llfuse.lock:
+ m.new_collection(collection.api_response(), collection)
+ self.assertTrue(m.writable())
+
+ streams = 2
+ files_per_stream = 200
+ data = 'x'
+
+ self.pool.apply(fuse_createCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
+
+ collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+ for i in range(0, streams):
+ self.assertIn('./stream' + str(i), collection2["manifest_text"])
+
+ for i in range(0, files_per_stream):
+ self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
+
+ # Read file contents
+ self.pool.apply(fuse_readContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
+
+ # Move file0.txt out of the streams into .
+ for i in range(0, streams):
+ self.pool.apply(fuse_moveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
+
+ collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+ manifest_streams = collection2['manifest_text'].split('\n')
+ self.assertEqual(4, len(manifest_streams))
+
+ for i in range(0, streams):
+ self.assertIn('file0.txt', manifest_streams[0])
+
+ for i in range(0, streams):
+ self.assertNotIn('file0.txt', manifest_streams[i+1])
+
+ for i in range(0, streams):
+ for j in range(1, files_per_stream):
+ self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
+
+ # Delete 'file1.txt' from all the streams
+ for i in range(0, streams):
+ self.pool.apply(fuse_deleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
+
+ collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+ manifest_streams = collection2['manifest_text'].split('\n')
+ self.assertEqual(4, len(manifest_streams))
+
+ for i in range(0, streams):
+ self.assertIn('file0.txt', manifest_streams[0])
+
+ self.assertNotIn('file1.txt', collection2['manifest_text'])
+
+ for i in range(0, streams):
+ for j in range(2, files_per_stream):
+ self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
+
+
+def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
+ class Test(unittest.TestCase):
+ def runTest(self):
+ self.magicDirTest_moveFileFromCollection()
+
+ @profiled
+ def magicDirTest_moveFileFromCollection(self):
+ os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
+
+ Test().runTest()
+
+def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
+ class Test(unittest.TestCase):
+ def runTest(self):
+ self.magicDirTest_removeFileFromCollection()
+
+ @profiled
+ def magicDirTest_removeFileFromCollection(self):
+ os.remove(os.path.join(mounttmp, collection1, filename))
+
+ Test().runTest()
+
+class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
+ def setUp(self):
+ super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
+
+ @profiled
+ def magicDirTest_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
+ # Create collection
+ collection = arvados.collection.Collection(api_client=self.api)
+ for j in range(0, files_per_stream):
+ with collection.open("file"+str(j)+".txt", "w") as f:
+ f.write(data)
+ collection.save_new()
+ return collection
+
+ @profiled
+ def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1, data='x'):
+ mount_ls = os.listdir(os.path.join(self.mounttmp, collection))
+
+ files = {}
+ for j in range(0, files_per_stream):
+ files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
+
+ for k, v in files.items():
+ with open(os.path.join(self.mounttmp, collection, k)) as f:
+ self.assertEqual(v, f.read())
+
+ def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
+ streams = 2
+ files_per_stream = 200
+ data = 'x'
+
+ collection1 = self.magicDirTest_createCollectionWithManyFiles()
+ # Create collection with multiple files
+ collection2 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream, data)
+
+ # Mount FuseMagicDir
+ self.make_mount(fuse.MagicDirectory)
+
+ self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams, files_per_stream, data)
+
+ # Move file0.txt out of the collection2 into collection1
+ self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(),
+ collection1.manifest_locator(), 'stream0', 'file0.txt',))
+ updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
+ self.assertFalse('file0.txt' in updated_collection['manifest_text'])
+ self.assertTrue('file1.txt' in updated_collection['manifest_text'])
+
+ # Delete file1.txt from collection2
+ self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(), 'stream0', 'file1.txt',))
+ updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
+ self.assertFalse('file1.txt' in updated_collection['manifest_text'])
+ self.assertTrue('file2.txt' in updated_collection['manifest_text'])
+
+
+def magicDirTest_MoveAllFilesFromCollection(mounttmp, from_collection, to_collection, stream, files_per_stream):
+ class Test(unittest.TestCase):
+ def runTest(self):
+ self.magicDirTest_moveAllFilesFromCollection()
+
+ @profiled
+ def magicDirTest_moveAllFilesFromCollection(self):
+ for j in range(0, files_per_stream):
+ os.rename(os.path.join(mounttmp, from_collection, 'file'+str(j)+'.txt'), os.path.join(mounttmp, to_collection, 'file'+str(j)+'.txt'))
+
+ Test().runTest()
+
+class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(MountTestBase):
+ def setUp(self):
+ super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother, self).setUp()
+
+ @profiled
+ def magicDirTestMoveAllFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
+ blocks_per_file=0, bytes_per_block=0, data='x'):
+ # Create collection
+ collection = arvados.collection.Collection(api_client=self.api)
+ for j in range(0, files_per_stream):
+ with collection.open("file"+str(j)+".txt", "w") as f:
+ f.write(data)
+ collection.save_new()
+ return collection
+
+ def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(self):
+ streams = 2
+ files_per_stream = 200
+ data = 'x'
+
+ collection1 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles()
+ # Create collection with multiple files
+ collection2 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
+
+ # Mount FuseMagicDir
+ self.make_mount(fuse.MagicDirectory)
+
+ # Move all files from collection2 into collection1
+ self.pool.apply(magicDirTest_MoveAllFilesFromCollection, (self.mounttmp, collection2.manifest_locator(),
+ collection1.manifest_locator(), 'stream0', files_per_stream,))
+
+ updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
+ file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
+ for name in file_names:
+ self.assertFalse(name in updated_collection['manifest_text'])
+
+ updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
+ for name in file_names:
+ self.assertTrue(name in updated_collection['manifest_text'])
+
+
+# Move one file at a time from one collection into another
+class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(MountTestBase):
+ def setUp(self):
+ super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother, self).setUp()
+
+ @profiled
+ def magicDirTestMoveFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
+ # Create collection
+ collection = arvados.collection.Collection(api_client=self.api)
+ for j in range(0, files_per_stream):
+ with collection.open("file"+str(j)+".txt", "w") as f:
+ f.write(data)
+ collection.save_new()
+ return collection
+
+ def magicDirTestMoveFiles_oneEachIntoAnother(self, from_collection, to_collection, files_per_stream):
+ for j in range(0, files_per_stream):
+ self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
+ to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
+
+ def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveEachFileIntoAnother(self):
+ streams = 2
+ files_per_stream = 200
+ data = 'x'
+
+ collection1 = self.magicDirTestMoveFiles_createCollectionWithManyFiles()
+ # Create collection with multiple files
+ collection2 = self.magicDirTestMoveFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
+
+ # Mount FuseMagicDir
+ self.make_mount(fuse.MagicDirectory)
+
+ # Move all files from collection2 into collection1
+ self.magicDirTestMoveFiles_oneEachIntoAnother(collection2, collection1, files_per_stream)
+
+ updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
+ file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
+ for name in file_names:
+ self.assertFalse(name in updated_collection['manifest_text'])
+
+ updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
+ for name in file_names:
+ self.assertTrue(name in updated_collection['manifest_text'])
+
+class FuseListLargeProjectContents(MountTestBase):
+ @profiled
+ def getProjectWithManyCollections(self):
+ project_contents = llfuse.listdir(self.mounttmp)
+ self.assertEqual(201, len(project_contents))
+ self.assertIn('Collection_1', project_contents)
+ return project_contents
+
+ @profiled
+ def listContentsInProjectWithManyCollections(self, project_contents):
+ project_contents = llfuse.listdir(self.mounttmp)
+ self.assertEqual(201, len(project_contents))
+ self.assertIn('Collection_1', project_contents)
+
+ for collection_name in project_contents:
+ collection_contents = llfuse.listdir(os.path.join(self.mounttmp, collection_name))
+ self.assertIn('baz', collection_contents)
+
+ def test_listLargeProjectContents(self):
+ self.make_mount(fuse.ProjectDirectory,
+ project_object=run_test_server.fixture('groups')['project_with_201_collections'])
+ project_contents = self.getProjectWithManyCollections()
+ self.listContentsInProjectWithManyCollections(project_contents)
import logging
import multiprocessing
import run_test_server
+import mock
-logger = logging.getLogger('arvados.arv-mount')
+from mount_test_base import MountTestBase
-class MountTestBase(unittest.TestCase):
- def setUp(self):
- # The underlying C implementation of open() makes a fstat() syscall
- # with the GIL still held. When the GETATTR message comes back to
- # llfuse (which in these tests is in the same interpreter process) it
- # can't acquire the GIL, so it can't service the fstat() call, so it
- # deadlocks. The workaround is to run some of our test code in a
- # separate process. Forturnately the multiprocessing module makes this
- # relatively easy.
- self.pool = multiprocessing.Pool(1)
-
- self.keeptmp = tempfile.mkdtemp()
- os.environ['KEEP_LOCAL_STORE'] = self.keeptmp
- self.mounttmp = tempfile.mkdtemp()
- run_test_server.run()
- run_test_server.authorize_with("admin")
- self.api = arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
-
- def make_mount(self, root_class, **root_kwargs):
- self.operations = fuse.Operations(os.getuid(), os.getgid(), enable_write=True)
- self.operations.inodes.add_entry(root_class(
- llfuse.ROOT_INODE, self.operations.inodes, self.api, 0, **root_kwargs))
- llfuse.init(self.operations, self.mounttmp, [])
- threading.Thread(None, llfuse.main).start()
- # wait until the driver is finished initializing
- self.operations.initlock.wait()
- return self.operations.inodes[llfuse.ROOT_INODE]
-
- def tearDown(self):
- self.pool.terminate()
- self.pool.join()
- del self.pool
-
- # llfuse.close is buggy, so use fusermount instead.
- #llfuse.close(unmount=True)
-
- count = 0
- success = 1
- while (count < 9 and success != 0):
- success = subprocess.call(["fusermount", "-u", self.mounttmp])
- time.sleep(0.1)
- count += 1
-
- self.operations.destroy()
-
- os.rmdir(self.mounttmp)
- shutil.rmtree(self.keeptmp)
- run_test_server.reset()
-
- def assertDirContents(self, subdir, expect_content):
- path = self.mounttmp
- if subdir:
- path = os.path.join(path, subdir)
- self.assertEqual(sorted(expect_content), sorted(llfuse.listdir(path)))
+logger = logging.getLogger('arvados.arv-mount')
class FuseMountTest(MountTestBase):
class FuseMagicTest(MountTestBase):
- def setUp(self):
- super(FuseMagicTest, self).setUp()
+ def setUp(self, api=None):
+ super(FuseMagicTest, self).setUp(api=api)
cw = arvados.CollectionWriter()
cw.write("data 1")
self.testcollection = cw.finish()
- self.api.collections().create(body={"manifest_text":cw.manifest_text()}).execute()
+ self.test_manifest = cw.manifest_text()
+ self.api.collections().create(body={"manifest_text":self.test_manifest}).execute()
def runTest(self):
self.make_mount(fuse.MagicDirectory)
self.pool.apply(fuseFsyncTestHelper, (self.mounttmp, self.testcollection))
+class MagicDirApiError(FuseMagicTest):
+ def setUp(self):
+ api = mock.MagicMock()
+ super(MagicDirApiError, self).setUp(api=api)
+ api.collections().get().execute.side_effect = iter([Exception('API fail'), {"manifest_text": self.test_manifest}])
+ api.keep.get.side_effect = Exception('Keep fail')
+
+ def runTest(self):
+ self.make_mount(fuse.MagicDirectory)
+
+ self.operations.inodes.inode_cache.cap = 1
+ self.operations.inodes.inode_cache.min_entries = 2
+
+ with self.assertRaises(OSError):
+ llfuse.listdir(os.path.join(self.mounttmp, self.testcollection))
+
+ llfuse.listdir(os.path.join(self.mounttmp, self.testcollection))
+
+
class FuseUnitTest(unittest.TestCase):
def test_sanitize_filename(self):
acceptable = [