Merge branch '6221-write-trash-list'
authorBrett Smith <brett@curoverse.com>
Mon, 3 Aug 2015 16:08:52 +0000 (12:08 -0400)
committerBrett Smith <brett@curoverse.com>
Mon, 3 Aug 2015 16:10:19 +0000 (12:10 -0400)
Closes #6221, #6673.

103 files changed:
apps/workbench/Gemfile.lock
apps/workbench/app/controllers/application_controller.rb
apps/workbench/app/helpers/pipeline_instances_helper.rb
apps/workbench/app/views/pipeline_instances/_running_component.html.erb
apps/workbench/app/views/pipeline_instances/_show_components_running.html.erb
apps/workbench/app/views/users/_manage_repositories.html.erb
apps/workbench/app/views/users/_manage_virtual_machines.html.erb
apps/workbench/app/views/virtual_machines/webshell.html.erb
apps/workbench/config/load_config.rb
apps/workbench/test/controllers/application_controller_test.rb
apps/workbench/test/controllers/collections_controller_test.rb
apps/workbench/test/controllers/jobs_controller_test.rb
apps/workbench/test/controllers/projects_controller_test.rb
apps/workbench/test/controllers/users_controller_test.rb
apps/workbench/test/helpers/collections_helper_test.rb
apps/workbench/test/integration/anonymous_access_test.rb
apps/workbench/test/integration/application_layout_test.rb
apps/workbench/test/integration/collection_upload_test.rb
apps/workbench/test/integration/collections_test.rb
apps/workbench/test/integration/pipeline_instances_test.rb
apps/workbench/test/integration/projects_test.rb
apps/workbench/test/integration/user_manage_account_test.rb
apps/workbench/test/integration/users_test.rb
doc/_config.yml
doc/_includes/_arv_copy_expectations.liquid [new file with mode: 0644]
doc/_includes/_install_git_curl.liquid [new file with mode: 0644]
doc/_includes/_install_postgres.liquid [new file with mode: 0644]
doc/_includes/_install_ruby_and_bundler.liquid [new file with mode: 0644]
doc/_includes/_tutorial_expectations.liquid
doc/images/add-new-repository.png
doc/images/added-new-repository.png [new file with mode: 0644]
doc/images/api-token-host.png [new file with mode: 0644]
doc/images/vm-access-with-webshell.png [new file with mode: 0644]
doc/install/install-api-server.html.textile.liquid
doc/install/install-arv-git-httpd.html.textile.liquid
doc/install/install-compute-node.html.textile.liquid
doc/install/install-docker.html.textile.liquid
doc/install/install-manual-prerequisites-ruby.html.textile.liquid [deleted file]
doc/install/install-manual-prerequisites.html.textile.liquid
doc/install/install-shell-server.html.textile.liquid
doc/install/install-sso.html.textile.liquid
doc/install/install-workbench-app.html.textile.liquid
doc/sdk/cli/install.html.textile.liquid
doc/sdk/cli/subcommands.html.textile.liquid
doc/user/getting_started/check-environment.html.textile.liquid
doc/user/getting_started/ssh-access-unix.html.textile.liquid
doc/user/getting_started/ssh-access-windows.html.textile.liquid
doc/user/getting_started/vm-login-with-webshell.html.textile.liquid [new file with mode: 0644]
doc/user/reference/api-tokens.html.textile.liquid
doc/user/topics/arv-copy.html.textile.liquid [new file with mode: 0644]
doc/user/topics/arv-run.html.textile.liquid
doc/user/tutorials/add-new-repository.html.textile.liquid
doc/user/tutorials/tutorial-submit-job.html.textile.liquid
docker/build_tools/Makefile
sdk/cli/bin/crunch-job
sdk/cli/test/binstub_clean_fail/mount [new file with mode: 0755]
sdk/cli/test/binstub_docker_noop/docker.io [new file with mode: 0755]
sdk/cli/test/binstub_sanity_check/docker.io [new file with mode: 0755]
sdk/cli/test/binstub_sanity_check/true [new file with mode: 0755]
sdk/cli/test/test_arv-collection-create.rb
sdk/cli/test/test_arv-get.rb
sdk/cli/test/test_arv-put.rb
sdk/cli/test/test_arv-run-pipeline-instance.rb
sdk/cli/test/test_arv-tag.rb
sdk/cli/test/test_crunch-job.rb [new file with mode: 0644]
sdk/cwl/.gitignore [new symlink]
sdk/cwl/README.rst [new file with mode: 0644]
sdk/cwl/arvados_cwl/__init__.py [new file with mode: 0644]
sdk/cwl/bin/cwl-runner [new file with mode: 0755]
sdk/cwl/gittaggers.py [new symlink]
sdk/cwl/setup.py [new file with mode: 0644]
sdk/perl/Makefile.PL
sdk/python/arvados/commands/arv_copy.py
sdk/python/arvados/commands/put.py
sdk/python/arvados/commands/run.py
sdk/python/arvados/commands/ws.py
sdk/python/arvados/events.py
sdk/python/arvados/util.py
sdk/python/bin/arv-get
sdk/python/tests/run_test_server.py
sdk/python/tests/test_arv_ws.py [new file with mode: 0644]
sdk/python/tests/test_util.py
sdk/python/tests/test_websockets.py
services/api/app/controllers/arvados/v1/repositories_controller.rb
services/api/app/controllers/arvados/v1/virtual_machines_controller.rb
services/api/app/models/arvados_model.rb
services/api/app/models/authorized_key.rb
services/api/app/models/commit.rb
services/api/config/application.default.yml
services/api/config/application.yml.example
services/api/config/database.yml.example [moved from services/api/config/database.yml.sample with 100% similarity]
services/api/config/initializers/hardcoded_api_tokens.rb.example [deleted file]
services/api/lib/eventbus.rb
services/api/test/fixtures/links.yml
services/api/test/functional/arvados/v1/repositories_controller_test.rb
services/api/test/functional/arvados/v1/virtual_machines_controller_test.rb
services/api/test/integration/websocket_test.rb
services/api/test/unit/authorized_key_test.rb
services/fuse/arvados_fuse/__init__.py
services/fuse/arvados_fuse/fusedir.py
services/fuse/tests/mount_test_base.py [new file with mode: 0644]
services/fuse/tests/performance/test_collection_performance.py [new file with mode: 0644]
services/fuse/tests/test_mount.py

index bc7ddaf7eda77c9a53df1a5c7e5839319b423042..20b8d6164ccca273e11756928a21c1a17851f07a 100644 (file)
@@ -294,6 +294,3 @@ DEPENDENCIES
   therubyracer
   uglifier (>= 1.0.3)
   wiselinks
-
-BUNDLED WITH
-   1.10.5
index db00be390a7e6846c2c8983efb90c2f213fd4336..db3d43040c416bef846a19d8ee0b4009a9e8f622 100644 (file)
@@ -707,6 +707,7 @@ class ApplicationController < ActionController::Base
   @@notification_tests = []
 
   @@notification_tests.push lambda { |controller, current_user|
+    return nil if Rails.configuration.shell_in_a_box_url
     AuthorizedKey.limit(1).where(authorized_user_uuid: current_user.uuid).each do
       return nil
     end
index ba05f9e88cd7cd2f0345c993d5e60a7603e2bb8c..8fafbc2022d5873032d1f9565c2385a26f4a794b 100644 (file)
@@ -289,7 +289,7 @@ module PipelineInstancesHelper
     else
       s = ""
       if days > 0
-        s += "#{days}<span class='time-label-divider'>d</span> "
+        s += "#{days}<span class='time-label-divider'>d</span>"
       end
 
       if (hours > 0)
@@ -298,7 +298,7 @@ module PipelineInstancesHelper
 
       s += "#{minutes}<span class='time-label-divider'>m</span>"
 
-      if not round_to_min
+      if not round_to_min or (days == 0 and hours == 0 and minutes == 0)
         s += "#{seconds}<span class='time-label-divider'>s</span>"
       end
     end
index 63075f7e66018ff675cfea93efecbd9aef5243ec..63a2371a1b3dd2dc398c9b672311bd606d64bdf7 100644 (file)
           <div class="col-md-3">
             <% if current_job[:started_at] %>
               <% walltime = ((if current_job[:finished_at] then current_job[:finished_at] else Time.now() end) - current_job[:started_at]) %>
-              <% cputime = tasks.map { |task|
-                   if task.started_at and task.job_uuid == current_job[:uuid]
-                      finished_at = task.finished_at || current_job[:finished_at] || Time.now()
-                      finished_at - task.started_at
-                   else
-                     0
-                   end
-                 }.reduce(:+) || 0 %>
-              <%= render_runtime(walltime, false, false) %>
-              <% if cputime > 0 %> / <%= render_runtime(cputime, false, false) %> (<%= (cputime/walltime).round(1) %>&Cross;)<% end %>
+              <% cputime = (current_job[:runtime_constraints].andand[:min_nodes] || 1) *
+                           ((current_job[:finished_at] || Time.now()) - current_job[:started_at]) %>
+              <%= render_runtime(walltime, false) %>
+              <% if cputime > 0 %> / <%= render_runtime(cputime, false) %> (<%= (cputime/walltime).round(1) %>&Cross;)<% end %>
             <% end %>
           </div>
           <% end %>
@@ -41,7 +35,7 @@
             <%# column offset 5 %>
             <div class="col-md-6">
               <% queuetime = Time.now - Time.parse(current_job[:created_at].to_s) %>
-              Queued for <%= render_runtime(queuetime, true) %>.
+              Queued for <%= render_runtime(queuetime, false) %>.
               <% begin %>
                 <% if current_job[:queue_position] == 0 %>
                   This job is next in the queue to run.
index b0b8601a0c326a0d03ac1cab12039299fa0ead3d..566e3d771e12796b8d8ebb7e273e34fe499def33 100644 (file)
@@ -12,7 +12,6 @@
 
 <% pipeline_jobs = render_pipeline_jobs %>
 <% job_uuids = pipeline_jobs.map { |j| j[:job].andand[:uuid] }.compact %>
-<% job_uuids_finished = {}; pipeline_jobs.map { |j| job_uuids_finished[j[:job].andand[:uuid]] = j[:job].andand[:finished_at] } %>
 
 <% if @object.state == 'Paused' %>
   <p>
@@ -21,7 +20,6 @@
   </p>
 <% end %>
 
-<% tasks = JobTask.filter([['job_uuid', 'in', job_uuids]]).results %>
 <% runningtime = determine_wallclock_runtime(pipeline_jobs.map {|j| j[:job]}.compact) %>
 
 <p>
@@ -43,9 +41,9 @@
                   end %>
 
     <%= if walltime > runningtime
-          render_runtime(walltime, true, false)
+          render_runtime(walltime, false)
         else
-          render_runtime(runningtime, true, false)
+          render_runtime(runningtime, false)
         end %><% if @object.finished_at %> at <%= render_localized_date(@object.finished_at) %><% end %>.
     <% else %>
       This pipeline is <%= if @object.state.start_with? 'Running' then 'active' else @object.state.downcase end %>.
       ran
     <% end %>
     for
-    <% cputime = tasks.map { |task|
-        if task.started_at
-          finished_at = task.finished_at || job_uuids_finished[task.job_uuid] || Time.now()
-          finished_at - task.started_at
+    <%
+        cputime = pipeline_jobs.map { |j|
+        if j[:job][:started_at]
+          (j[:job][:runtime_constraints].andand[:min_nodes] || 1) * ((j[:job][:finished_at] || Time.now()) - j[:job][:started_at])
         else
           0
         end
        }.reduce(:+) || 0 %>
-    <%= render_runtime(runningtime, true, false) %><% if (walltime - runningtime) > 0 %>
-      (<%= render_runtime(walltime - runningtime, true, false) %> queued)<% end %><% if cputime == 0 %>.<% else %>
+    <%= render_runtime(runningtime, false) %><% if (walltime - runningtime) > 0 %>
+      (<%= render_runtime(walltime - runningtime, false) %> queued)<% end %><% if cputime == 0 %>.<% else %>
       and used
-    <%= render_runtime(cputime, true, false) %>
-    of CPU time (<%= (cputime/runningtime).round(1) %>&Cross; scaling).
+    <%= render_runtime(cputime, false) %>
+    of node allocation time (<%= (cputime/runningtime).round(1) %>&Cross; scaling).
     <% end %>
 </p>
 
@@ -99,5 +97,5 @@
 %>
 
 <% pipeline_jobs.each_with_index do |pj, i| %>
-  <%= render partial: 'running_component', locals: {tasks: tasks, pj: pj, i: i, expanded: false} %>
+  <%= render partial: 'running_component', locals: {pj: pj, i: i, expanded: false} %>
 <% end %>
index bcf0d643198a7f33ccc8eaee48a3de2bb8692564..b616b6d6347fe2056d1f2666bbb060cac44ffb50 100644 (file)
@@ -46,6 +46,7 @@
               <%= writable ? 'writable' : 'read-only' %>
             </td>
             <td style="word-break:break-all;">
+              <code><%= repo.http_fetch_url %></code><br/>
               <code><%= writable ? repo.push_url : repo.fetch_url %></code>
             </td>
             <td>
index f635138dbb86322a8b9f5fd15482788a02ffc4f5..295311d33d62b848e7e47baab25a8662a7f6043c 100644 (file)
@@ -9,12 +9,9 @@
 
 <div id="manage_virtual_machines" class="panel-body">
   <p>
-    For more information see <%= link_to raw('Arvados Docs &rarr; User Guide &rarr; SSH access'),
-  "#{Rails.configuration.arvados_docsite}/user/getting_started/ssh-access-unix.html",
+    For more information see <%= link_to raw('Arvados Docs &rarr; User Guide &rarr; VM access'),
+  "#{Rails.configuration.arvados_docsite}/user/getting_started/vm-login-with-webshell.html",
   target: "_blank"%>.
-    <% if @my_virtual_machines.any? or true %>
-      A sample <code>~/.ssh/config</code> entry is provided below.
-    <% end %>
   </p>
 
   <% if !@my_virtual_machines.any? %>
       </tbody>
     </table>
 
-    <p><i>~/.ssh/config:</i></p>
+    <p>Sample SSH Conig  <i>~/.ssh/config:</i></p>
     <pre>Host *.arvados
       TCPKeepAlive yes
       ServerAliveInterval 60
index 0f79d3f8f6e8aa941f54a1ed3b4ab26f0bb09dd6..a5507b377d106b712b99a216a5719a8614684a3b 100644 (file)
@@ -31,8 +31,8 @@
           setTimeout(function() {
             sh.keysPressed("<%= j Thread.current[:arvados_api_token] %>\n");
             sh.vt100('(sent authentication token)\n');
-          }, 1000);
-        }, 1000);
+          }, 2000);
+        }, 2000);
       }
     // -->
 </script>
index 51fc81ab753d844ae020cda455220d1619a21cbd..f14c3ca8456b3b252574027f3b4ba53ad501ff85 100644 (file)
@@ -23,11 +23,17 @@ ArvadosWorkbench::Application.configure do
     ks.each do |kk|
       cfg = cfg.send(kk)
     end
-    if cfg.respond_to?(k.to_sym) and !cfg.send(k).nil?
-      # Config must have been set already in environments/*.rb.
+    if v.nil? and cfg.respond_to?(k) and !cfg.send(k).nil?
+      # Config is nil in *.yml, but has been set already in
+      # environments/*.rb (or has a Rails default). Don't overwrite
+      # the default/upstream config with nil.
       #
       # After config files have been migrated, this mechanism should
-      # be deprecated, then removed.
+      # be removed.
+      Rails.logger.warn <<EOS
+DEPRECATED: Inheriting config.#{ks.join '.'} from Rails config.
+            Please move this config into config/application.yml.
+EOS
     elsif v.nil?
       # Config variables are not allowed to be nil. Make a "naughty"
       # list, and present it below.
index 22ff7d4b2bfc60c35309eb6c654a087de5c0a3f8..3504d958a3c11ddf6b2dd495426efdd63b3c945e 100644 (file)
@@ -402,4 +402,20 @@ class ApplicationControllerTest < ActionController::TestCase
       assert_equal([['.', 'foo', 3]], assigns(:object).files)
     end
   end
+
+  test 'Edit name and verify that a duplicate is not created' do
+    @controller = ProjectsController.new
+    project = api_fixture("groups")["aproject"]
+    post :update, {
+      id: project["uuid"],
+      project: {
+        name: 'test name'
+      },
+      format: :json
+    }, session_for(:active)
+    assert_includes @response.body, 'test name'
+    updated = assigns(:object)
+    assert_equal updated.uuid, project["uuid"]
+    assert_equal 'test name', updated.name
+  end
 end
index b99ab95500202cc093f80830d1b8cb165f2ed51a..13644e00bdce28db3460aa2f722f679deb107c7e 100644 (file)
@@ -461,4 +461,57 @@ class CollectionsControllerTest < ActionController::TestCase
 
     assert_equal files.sort, disabled.sort, "Expected to see all collection files in disabled list of files"
   end
+
+  test "anonymous user accesses collection in shared project" do
+    Rails.configuration.anonymous_user_token =
+      api_fixture('api_client_authorizations')['anonymous']['api_token']
+    collection = api_fixture('collections')['public_text_file']
+    get(:show, {id: collection['uuid']})
+
+    response_object = assigns(:object)
+    assert_equal collection['name'], response_object['name']
+    assert_equal collection['uuid'], response_object['uuid']
+    assert_includes @response.body, 'Hello world'
+    assert_includes @response.body, 'Content address'
+    refute_nil css_select('[href="#Advanced"]')
+  end
+
+  test "can view empty collection" do
+    get :show, {id: 'd41d8cd98f00b204e9800998ecf8427e+0'}, session_for(:active)
+    assert_includes @response.body, 'The following collections have this content'
+  end
+
+  test "collection portable data hash redirect" do
+    di = api_fixture('collections')['docker_image']
+    get :show, {id: di['portable_data_hash']}, session_for(:active)
+    assert_match /\/collections\/#{di['uuid']}/, @response.redirect_url
+  end
+
+  test "collection portable data hash with multiple matches" do
+    pdh = api_fixture('collections')['foo_file']['portable_data_hash']
+    get :show, {id: pdh}, session_for(:admin)
+    matches = api_fixture('collections').select {|k,v| v["portable_data_hash"] == pdh}
+    assert matches.size > 1
+
+    matches.each do |k,v|
+      assert_match /href="\/collections\/#{v['uuid']}">.*#{v['name']}<\/a>/, @response.body
+    end
+
+    assert_includes @response.body, 'The following collections have this content:'
+    assert_not_includes @response.body, 'more results are not shown'
+    assert_not_includes @response.body, 'Activity'
+    assert_not_includes @response.body, 'Sharing and permissions'
+  end
+
+  test "collection page renders name" do
+    collection = api_fixture('collections')['foo_file']
+    get :show, {id: collection['uuid']}, session_for(:active)
+    assert_includes @response.body, collection['name']
+    assert_match /href="#{collection['uuid']}\/foo" ><\/i> foo</, @response.body
+  end
+
+  test "No Upload tab on non-writable collection" do
+    get :show, {id: api_fixture('collections')['user_agreement']['uuid']}, session_for(:active)
+    assert_not_includes @response.body, '<a href="#Upload"'
+  end
 end
index 609e58c1c2dd72a863dd14037ce3a1eb441d30a7..1e2643526de415df8ac20eb4cad9a510f8b90732 100644 (file)
@@ -1,4 +1,8 @@
 require 'test_helper'
 
 class JobsControllerTest < ActionController::TestCase
+  test "visit jobs index page" do
+    get :index, {}, session_for(:active)
+    assert_response :success
+  end
 end
index 62a93a1308b3755ac64e0f6c49b348b5354957f2..13979df68f7ebfcaf2a466b6fa2d6fb7249be47b 100644 (file)
@@ -306,4 +306,95 @@ class ProjectsControllerTest < ActionController::TestCase
     assert_match /\/users\/welcome/, @response.redirect_url
     assert_empty css_select('[href="/projects/public"]')
   end
+
+  test "find a project and edit its description" do
+    project = api_fixture('groups')['aproject']
+    use_token :active
+    found = Group.find(project['uuid'])
+    found.description = 'test description update'
+    found.save!
+    get(:show, {id: project['uuid']}, session_for(:active))
+    assert_includes @response.body, 'test description update'
+  end
+
+  test "find a project and edit description to textile description" do
+    project = api_fixture('groups')['aproject']
+    use_token :active
+    found = Group.find(project['uuid'])
+    found.description = '*test bold description for textile formatting*'
+    found.save!
+    get(:show, {id: project['uuid']}, session_for(:active))
+    assert_includes @response.body, '<strong>test bold description for textile formatting</strong>'
+  end
+
+  test "find a project and edit description to html description" do
+    project = api_fixture('groups')['aproject']
+    use_token :active
+    found = Group.find(project['uuid'])
+    found.description = 'Textile description with link to home page <a href="/">take me home</a>.'
+    found.save!
+    get(:show, {id: project['uuid']}, session_for(:active))
+    assert_includes @response.body, 'Textile description with link to home page <a href="/">take me home</a>.'
+  end
+
+  test "find a project and edit description to textile description with link to object" do
+    project = api_fixture('groups')['aproject']
+    use_token :active
+    found = Group.find(project['uuid'])
+
+    # uses 'Link to object' as a hyperlink for the object
+    found.description = '"Link to object":' + api_fixture('groups')['asubproject']['uuid']
+    found.save!
+    get(:show, {id: project['uuid']}, session_for(:active))
+
+    # check that input was converted to textile, not staying as inputted
+    refute_includes  @response.body,'"Link to object"'
+    refute_empty css_select('[href="/groups/zzzzz-j7d0g-axqo7eu9pwvna1x"]')
+  end
+
+  test "project viewer can't see project sharing tab" do
+    project = api_fixture('groups')['aproject']
+    get(:show, {id: project['uuid']}, session_for(:project_viewer))
+    refute_includes @response.body, '<div id="Sharing"'
+    assert_includes @response.body, '<div id="Data_collections"'
+  end
+
+  [
+    'admin',
+    'active',
+  ].each do |username|
+    test "#{username} can see project sharing tab" do
+     project = api_fixture('groups')['aproject']
+     get(:show, {id: project['uuid']}, session_for(username))
+     assert_includes @response.body, '<div id="Sharing"'
+     assert_includes @response.body, '<div id="Data_collections"'
+    end
+  end
+
+  [
+    ['admin',true],
+    ['active',true],
+    ['project_viewer',false],
+  ].each do |user, can_move|
+    test "#{user} can move subproject from project #{can_move}" do
+      get(:show, {id: api_fixture('groups')['aproject']['uuid']}, session_for(user))
+      if can_move
+        assert_includes @response.body, 'Move project...'
+      else
+        refute_includes @response.body, 'Move project...'
+      end
+    end
+  end
+
+  [
+    ["jobs", "/jobs"],
+    ["pipelines", "/pipeline_instances"],
+    ["collections", "/collections"],
+  ].each do |target,path|
+    test "test dashboard button all #{target}" do
+      get :index, {}, session_for(:active)
+      assert_includes @response.body, "href=\"#{path}\""
+      assert_includes @response.body, "All #{target}"
+    end
+  end
 end
index c1436da4545e93197c95d2b850614cf55c95cafc..6b9cd6efa647edd391d78ff79a6e8bbd429348c9 100644 (file)
@@ -1,6 +1,7 @@
 require 'test_helper'
 
 class UsersControllerTest < ActionController::TestCase
+
   test "valid token works in controller test" do
     get :index, {}, session_for(:active)
     assert_response :success
@@ -74,4 +75,48 @@ class UsersControllerTest < ActionController::TestCase
     end
     assert_equal 1, found_email, "Expected 1 email after requesting shell access"
   end
+
+  [
+    'admin',
+    'active',
+  ].each do |username|
+    test "access users page as #{username} and verify show button is available" do
+      admin_user = api_fixture('users','admin')
+      active_user = api_fixture('users','active')
+      get :index, {}, session_for(username)
+      if username == 'admin'
+        assert_match /<a href="\/projects\/#{admin_user['uuid']}">Home<\/a>/, @response.body
+        assert_match /<a href="\/projects\/#{active_user['uuid']}">Home<\/a>/, @response.body
+        assert_match /href="\/users\/#{admin_user['uuid']}" title="show user"><i class="fa fa-fw fa-user"><\/i> Show<\/a/, @response.body
+        assert_match /href="\/users\/#{active_user['uuid']}" title="show user"><i class="fa fa-fw fa-user"><\/i> Show<\/a/, @response.body
+        assert_includes @response.body, admin_user['email']
+        assert_includes @response.body, active_user['email']
+      else
+        refute_match  /Home<\/a>/, @response.body
+        refute_match /href="\/users\/#{admin_user['uuid']}" title="show user"><i class="fa fa-fw fa-user"><\/i> Show<\/a/, @response.body
+        assert_match /href="\/users\/#{active_user['uuid']}" title="show user"><i class="fa fa-fw fa-user"><\/i> Show<\/a/, @response.body
+        assert_includes @response.body, active_user['email']
+      end
+    end
+  end
+
+  [
+    'admin',
+    'active',
+  ].each do |username|
+    test "access settings drop down menu as #{username}" do
+      admin_user = api_fixture('users','admin')
+      active_user = api_fixture('users','active')
+      get :show, {
+        id: api_fixture('users')[username]['uuid']
+      }, session_for(username)
+      if username == 'admin'
+        assert_includes @response.body, admin_user['email']
+        refute_empty css_select('[id="system-menu"]')
+      else
+        assert_includes @response.body, active_user['email']
+        assert_empty css_select('[id="system-menu"]')
+      end
+    end
+  end
 end
index 463dacc02226bf104eaa91bbe664cf2bc386ce6a..9d411147aef1c9dee5d98c5e965e9869747ca439 100644 (file)
@@ -27,6 +27,7 @@ class CollectionsHelperTest < ActionView::TestCase
     ["filename.yml", true],
 
     ["filename.bam", false],
+    ["filename.tar", false],
     ["filename", false],
   ].each do |file_name, preview_allowed|
     test "verify '#{file_name}' is allowed for preview #{preview_allowed}" do
index 8ac3a56cb42abd1983e15d621541c3cb158df85d..7d3dfabd6448ee56ce7d688c7a1f3dc9b7cacabd 100644 (file)
@@ -310,14 +310,4 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
       end
     end
   end
-
-  test "anonymous user accesses collection in shared project" do
-    visit "/collections/#{api_fixture('collections')['public_text_file']['uuid']}"
-
-    # in collection page
-    assert_text 'Public Projects Unrestricted public data'
-    assert_text 'Hello world'
-    assert_text 'Content address'
-    assert_selector 'a', text: 'Provenance graph'
-  end
 end
index daf2b09bbb1717b50bd472a54cda8e13a0e63db1..31a297f0c14c3dd4215499e8d202d294ca980ff5 100644 (file)
@@ -201,4 +201,43 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
       assert page.has_link?('Report a problem ...'), 'No link - Report a problem'
     end
   end
+
+  test "no SSH public key notification when shell_in_a_box_url is configured" do
+    Rails.configuration.shell_in_a_box_url = 'example.com'
+    visit page_with_token('job_reader')
+    click_link 'notifications-menu'
+    assert_no_selector 'a', text:'Click here to set up an SSH public key for use with Arvados.'
+    assert_selector 'a', text:'Click here to learn how to run an Arvados Crunch pipeline'
+  end
+
+   [
+    ['Repositories','repository','Attributes'],
+    ['Virtual machines','virtual machine','current_user_logins'],
+    ['SSH keys','authorized key','public_key'],
+    ['Links','link','link_class'],
+    ['Groups','group','group_class'],
+    ['Compute nodes','node','info[ping_secret'],
+    ['Keep services','keep service','service_ssl_flag'],
+    ['Keep disks', 'keep disk','bytes_free'],
+  ].each do |page_name, add_button_text, look_for|
+    test "test system menu #{page_name} link" do
+      skip 'Skip repositories test until #6652 is fixed.' if page_name == 'Repositories'
+
+      visit page_with_token('admin')
+      within('.navbar-fixed-top') do
+        page.find("#system-menu").click
+        within('.dropdown-menu') do
+          assert_selector 'a', text: page_name
+          find('a', text: page_name).click
+        end
+      end
+
+      # click the add button
+      assert_selector 'button', text: "Add a new #{add_button_text}"
+      find('button', text: "Add a new #{add_button_text}").click
+
+      # look for unique property in the created object page
+      assert page.has_text? look_for
+    end
+  end
 end
index 6960d3bc899d47bdfe929719f88dd24615db3f98..62efee4d67e6b4e5a84e2340bcc55902b18ba30d 100644 (file)
@@ -28,13 +28,6 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
     assert_selector 'div#Upload.active div.panel'
   end
 
-  test "No Upload tab on non-writable collection" do
-    need_javascript
-    visit(page_with_token 'active',
-          '/collections/'+api_fixture('collections')['user_agreement']['uuid'])
-    assert_no_selector '.nav-tabs Upload'
-  end
-
   test "Upload two empty files with the same name" do
     need_selenium "to make file uploads work"
     visit page_with_token 'active', sandbox_path
index 2eee6fe8dbade6c7272410cbc42443e9b8cb5b08..4f66e9d6b58bf4e9402a95ab21192268fa9edd1a 100644 (file)
@@ -20,17 +20,6 @@ class CollectionsTest < ActionDispatch::IntegrationTest
     assert_text "Copy of #{collection_name}"
   end
 
-  test "Collection page renders name" do
-    Capybara.current_driver = :rack_test
-    uuid = api_fixture('collections')['foo_file']['uuid']
-    coll_name = api_fixture('collections')['foo_file']['name']
-    visit page_with_token('active', "/collections/#{uuid}")
-    assert(page.has_text?(coll_name), "Collection page did not include name")
-    # Now check that the page is otherwise normal, and the collection name
-    # isn't only showing up in an error message.
-    assert(page.has_link?('foo'), "Collection page did not include file link")
-  end
-
   def check_sharing(want_state, link_regexp)
     # We specifically want to click buttons.  See #4291.
     if want_state == :off
@@ -91,13 +80,6 @@ class CollectionsTest < ActionDispatch::IntegrationTest
     end
   end
 
-  test "can view empty collection" do
-    Capybara.current_driver = :rack_test
-    uuid = 'd41d8cd98f00b204e9800998ecf8427e+0'
-    visit page_with_token('active', "/collections/#{uuid}")
-    assert page.has_text?(/This collection is empty|The following collections have this content/)
-  end
-
   test "combine selected collections into new collection" do
     foo_collection = api_fixture('collections')['foo_file']
     bar_collection = api_fixture('collections')['bar_file']
@@ -191,33 +173,6 @@ class CollectionsTest < ActionDispatch::IntegrationTest
     assert(page.has_text?('file2_in_subdir4.txt'), 'file not found - file1_in_subdir4.txt')
   end
 
-  test "Collection portable data hash redirect" do
-    di = api_fixture('collections')['docker_image']
-    visit page_with_token('active', "/collections/#{di['portable_data_hash']}")
-
-    # check redirection
-    assert current_path.end_with?("/collections/#{di['uuid']}")
-    assert page.has_text?("docker_image")
-    assert page.has_text?("Activity")
-    assert page.has_text?("Sharing and permissions")
-  end
-
-  test "Collection portable data hash with multiple matches" do
-    pdh = api_fixture('collections')['foo_file']['portable_data_hash']
-    visit page_with_token('admin', "/collections/#{pdh}")
-
-    matches = api_fixture('collections').select {|k,v| v["portable_data_hash"] == pdh}
-    assert matches.size > 1
-
-    matches.each do |k,v|
-      assert page.has_link?(v["name"]), "Page /collections/#{pdh} should contain link '#{v['name']}'"
-    end
-    assert_text 'The following collections have this content:'
-    assert_no_text 'more results are not shown'
-    assert_no_text 'Activity'
-    assert_no_text 'Sharing and permissions'
-  end
-
   test "Collection portable data hash with multiple matches with more than one page of results" do
     pdh = api_fixture('collections')['baz_file']['portable_data_hash']
     visit page_with_token('admin', "/collections/#{pdh}")
index da8f439dfe2df0a310d6bc7837968033bae49f29..b6bf700d0948a1eb85a8ed00fe0c30fc6ae6e037 100644 (file)
@@ -469,7 +469,7 @@ class PipelineInstancesTest < ActionDispatch::IntegrationTest
       page_text = page.text
 
       if run_time
-        match = /This pipeline started at (.*)\. It failed after (.*) seconds at (.*)\. Check the Log/.match page_text
+        match = /This pipeline started at (.*)\. It failed after (.*) at (.*)\. Check the Log/.match page_text
       else
         match = /This pipeline started at (.*). It has been active for(.*)/.match page_text
       end
index 9c2842f1c2c6d5ba62d07413d28ebecb415cf054..dc54b8d1c14e892e9640120999340279c8cf4e9c 100644 (file)
@@ -36,109 +36,6 @@ class ProjectsTest < ActionDispatch::IntegrationTest
            "Description update did not survive page refresh")
   end
 
-  test 'Find a project and edit description to textile description' do
-    visit page_with_token 'active', '/'
-    find("#projects-menu").click
-    find(".dropdown-menu a", text: "A Project").click
-    within('.container-fluid', text: api_fixture('groups')['aproject']['name']) do
-      find('span', text: api_fixture('groups')['aproject']['name']).click
-      within('.arv-description-as-subtitle') do
-        find('.fa-pencil').click
-        find('.editable-input textarea').set('<p>*Textile description for A project* - "take me home":/ </p><p>And a new paragraph in description.</p>')
-        find('.editable-submit').click
-      end
-      wait_for_ajax
-    end
-
-    # visit project page
-    visit current_path
-    assert_no_text '*Textile description for A project*'
-    assert(find?('.container-fluid', text: 'Textile description for A project'),
-           "Description update did not survive page refresh")
-    assert(find?('.container-fluid', text: 'And a new paragraph in description'),
-           "Description did not contain the expected new paragraph")
-    assert(page.has_link?("take me home"), "link not found in description")
-
-    click_link 'take me home'
-
-    # now in dashboard
-    assert(page.has_text?('Active pipelines'), 'Active pipelines - not found on dashboard')
-  end
-
-  test 'Find a project and edit description to html description' do
-    visit page_with_token 'active', '/'
-    find("#projects-menu").click
-    find(".dropdown-menu a", text: "A Project").click
-    within('.container-fluid', text: api_fixture('groups')['aproject']['name']) do
-      find('span', text: api_fixture('groups')['aproject']['name']).click
-      within('.arv-description-as-subtitle') do
-        find('.fa-pencil').click
-        find('.editable-input textarea').set('<br>Textile description for A project</br> - <a href="/">take me home</a>')
-        find('.editable-submit').click
-      end
-      wait_for_ajax
-    end
-    visit current_path
-    assert(find?('.container-fluid', text: 'Textile description for A project'),
-           "Description update did not survive page refresh")
-    assert(!find?('.container-fluid', text: '<br>Textile description for A project</br>'),
-           "Textile description is displayed with uninterpreted formatting characters")
-    assert(page.has_link?("take me home"),"link not found in description")
-    click_link 'take me home'
-    assert page.has_text?('Active pipelines')
-  end
-
-  test 'Find a project and edit description to textile description with link to object' do
-    visit page_with_token 'active', '/'
-    find("#projects-menu").click
-    find(".dropdown-menu a", text: "A Project").click
-    within('.container-fluid', text: api_fixture('groups')['aproject']['name']) do
-      find('span', text: api_fixture('groups')['aproject']['name']).click
-      within('.arv-description-as-subtitle') do
-        find('.fa-pencil').click
-        find('.editable-input textarea').set('*Textile description for A project* - "go to sub-project":' + api_fixture('groups')['asubproject']['uuid'] + "'")
-        find('.editable-submit').click
-      end
-      wait_for_ajax
-    end
-    visit current_path
-    assert(find?('.container-fluid', text: 'Textile description for A project'),
-           "Description update did not survive page refresh")
-    assert(!find?('.container-fluid', text: '*Textile description for A project*'),
-           "Textile description is displayed with uninterpreted formatting characters")
-    assert(page.has_link?("go to sub-project"), "link not found in description")
-    click_link 'go to sub-project'
-    assert(page.has_text?(api_fixture('groups')['asubproject']['name']), 'sub-project name not found after clicking link')
-  end
-
-  test 'Add a new name, then edit it, without creating a duplicate' do
-    project_uuid = api_fixture('groups')['aproject']['uuid']
-    specimen_uuid = api_fixture('traits')['owned_by_aproject_with_no_name']['uuid']
-    visit page_with_token 'active', '/projects/' + project_uuid
-    click_link 'Other objects'
-    within '.selection-action-container' do
-      # Wait for the tab to load:
-      assert_selector 'tr[data-kind="arvados#trait"]'
-      within first('tr', text: 'Trait') do
-        find(".fa-pencil").click
-        find('.editable-input input').set('Now I have a name.')
-        find('.glyphicon-ok').click
-        assert_selector '.editable', text: 'Now I have a name.'
-        find(".fa-pencil").click
-        find('.editable-input input').set('Now I have a new name.')
-        find('.glyphicon-ok').click
-      end
-      wait_for_ajax
-      assert_selector '.editable', text: 'Now I have a new name.'
-    end
-    visit current_path
-    click_link 'Other objects'
-    within '.selection-action-container' do
-      find '.editable', text: 'Now I have a new name.'
-      assert_no_selector '.editable', text: 'Now I have a name.'
-    end
-  end
-
   test 'Create a project and move it into a different project' do
     visit page_with_token 'active', '/projects'
     find("#projects-menu").click
@@ -202,12 +99,6 @@ class ProjectsTest < ActionDispatch::IntegrationTest
                     text: group_name("anonymous_group"))
   end
 
-  test "project viewer can't see project sharing tab" do
-    show_object_using('project_viewer', 'groups', 'aproject', 'A Project')
-    assert(page.has_no_link?("Sharing"),
-           "read-only project user sees sharing tab")
-  end
-
   test "project owner can manage sharing for another user" do
     add_user = api_fixture('users')['future_project_user']
     new_name = ["first_name", "last_name"].map { |k| add_user[k] }.join(" ")
@@ -517,18 +408,6 @@ class ProjectsTest < ActionDispatch::IntegrationTest
     end
   end
 
-  [
-    ["jobs", "/jobs"],
-    ["pipelines", "/pipeline_instances"],
-    ["collections", "/collections"]
-  ].each do |target,path|
-    test "Test dashboard button all #{target}" do
-      visit page_with_token 'active', '/'
-      click_link "All #{target}"
-      assert_equal path, current_path
-    end
-  end
-
   def scroll_setup(project_name,
                    total_nbr_items,
                    item_list_parameter,
@@ -651,26 +530,6 @@ class ProjectsTest < ActionDispatch::IntegrationTest
     end
   end
 
-  # Move button accessibility
-  [
-    ['admin', true],
-    ['active', true],  # project owner
-    ['project_viewer', false],
-    ].each do |user, can_move|
-    test "#{user} can move subproject under another user's Home #{can_move}" do
-      project = api_fixture('groups')['aproject']
-      collection = api_fixture('collections')['collection_to_move_around_in_aproject']
-
-      # verify the project move button
-      visit page_with_token user, "/projects/#{project['uuid']}"
-      if can_move
-        assert page.has_link? 'Move project...'
-      else
-        assert page.has_no_link? 'Move project...'
-      end
-    end
-  end
-
   test "error while loading tab" do
     original_arvados_v1_base = Rails.configuration.arvados_v1_base
 
index e50907bf8b3d8a67c6f307faab011b19aa4210c7..1b80daf03ce49905e8ed4097496544c958a432bf 100644 (file)
@@ -42,13 +42,13 @@ class UserManageAccountTest < ActionDispatch::IntegrationTest
 
         page.find_field('public_key').set 'first test with an incorrect ssh key value'
         click_button 'Submit'
-        assert page.has_text?('Public key does not appear to be a valid ssh-rsa or dsa public key'), 'No text - Public key does not appear to be a valid'
+        assert_text 'Public key does not appear to be a valid ssh-rsa or dsa public key'
 
         public_key_str = api_fixture('authorized_keys')['active']['public_key']
         page.find_field('public_key').set public_key_str
         page.find_field('name').set 'added_in_test'
         click_button 'Submit'
-        assert page.has_text?('Public key already exists in the database, use a different key.'), 'No text - Public key already exists'
+        assert_text 'Public key already exists in the database, use a different key.'
 
         new_key = SSHKey.generate
         page.find_field('public_key').set new_key.ssh_public_key
@@ -57,7 +57,7 @@ class UserManageAccountTest < ActionDispatch::IntegrationTest
       end
 
       # key must be added. look for it in the refreshed page
-      assert page.has_text?('added_in_test'), 'No text - added_in_test'
+      assert_text 'added_in_test'
   end
 
   [
@@ -180,5 +180,7 @@ class UserManageAccountTest < ActionDispatch::IntegrationTest
       click_on "Create"
     end
     assert_text ":active/workbenchtest.git"
+    assert_match /git@git.*:active\/workbenchtest.git/, page.text
+    assert_match /https:\/\/git.*\/active\/workbenchtest.git/, page.text
   end
 end
index db670815d83ca1ca86816c859191dbce9dd52660..1ae302c23947c2968d194fdb006bcdaf3561be04 100644 (file)
@@ -197,44 +197,4 @@ class UsersTest < ActionDispatch::IntegrationTest
     click_link 'Metadata'
     assert page.has_text? 'VirtualMachine: testvm.shell'
   end
-
-  [
-    'admin',
-    'active',
-  ].each do |username|
-    test "login as #{username} and access show button" do
-      need_javascript
-
-      user = api_fixture('users', username)
-
-      visit page_with_token(username, '/users')
-
-      within('tr', text: user['uuid']) do
-        assert_text user['email']
-        if username == 'admin'
-          assert_selector 'a', text: 'Home'
-        else
-          assert_no_selector 'a', text: 'Home'
-        end
-        assert_selector 'a', text: 'Show'
-        find('a', text: 'Show').click
-      end
-      assert_selector 'a', text: 'Attributes'
-    end
-  end
-
-  test "admin user can access another user page" do
-    need_javascript
-
-    visit page_with_token('admin', '/users')
-
-    active_user = api_fixture('users', 'active')
-    within('tr', text: active_user['uuid']) do
-      assert_text active_user['email']
-      assert_selector "a[href=\"/projects/#{active_user['uuid']}\"]", text: 'Home'
-      assert_selector 'a', text: 'Show'
-      find('a', text: 'Show').click
-    end
-    assert_selector 'a', text:'Attributes'
-  end
 end
index c24f0a6a4c76f877f5b948c631e0138dfc39c87d..ba071d5b3c0b928efd6c45484d83a581e31d214d 100644 (file)
@@ -32,6 +32,7 @@ navbar:
       - user/getting_started/workbench.html.textile.liquid
       - user/tutorials/tutorial-pipeline-workbench.html.textile.liquid
     - Access an Arvados virtual machine:
+      - user/getting_started/vm-login-with-webshell.html.textile.liquid
       - user/getting_started/ssh-access-unix.html.textile.liquid
       - user/getting_started/ssh-access-windows.html.textile.liquid
       - user/getting_started/check-environment.html.textile.liquid
@@ -41,6 +42,7 @@ navbar:
       - user/tutorials/tutorial-keep-get.html.textile.liquid
       - user/tutorials/tutorial-keep-mount.html.textile.liquid
       - user/topics/keep.html.textile.liquid
+      - user/topics/arv-copy.html.textile.liquid
     - Run a pipeline on the command line:
       - user/topics/running-pipeline-command-line.html.textile.liquid
       - user/topics/arv-run.html.textile.liquid
@@ -141,21 +143,19 @@ navbar:
   installguide:
     - Overview:
       - install/index.html.textile.liquid
-    - Docker:
+    - Docker-based installation:
       - install/pre-built-docker.html.textile.liquid
       - install/install-docker.html.textile.liquid
     - Manual installation:
       - install/install-manual-prerequisites.html.textile.liquid
+      - install/install-sso.html.textile.liquid
       - install/install-api-server.html.textile.liquid
-      - install/install-workbench-app.html.textile.liquid
-      - install/install-shell-server.html.textile.liquid
-      - install/create-standard-objects.html.textile.liquid
+      - install/install-arv-git-httpd.html.textile.liquid
       - install/install-keepstore.html.textile.liquid
       - install/install-keepproxy.html.textile.liquid
-      - install/install-arv-git-httpd.html.textile.liquid
       - install/install-crunch-dispatch.html.textile.liquid
       - install/install-compute-node.html.textile.liquid
+      - install/install-shell-server.html.textile.liquid
+      - install/create-standard-objects.html.textile.liquid
+      - install/install-workbench-app.html.textile.liquid
       - install/cheat_sheet.html.textile.liquid
-    - Software prerequisites:
-      - install/install-manual-prerequisites-ruby.html.textile.liquid
-      - install/install-sso.html.textile.liquid
diff --git a/doc/_includes/_arv_copy_expectations.liquid b/doc/_includes/_arv_copy_expectations.liquid
new file mode 100644 (file)
index 0000000..a76c9e7
--- /dev/null
@@ -0,0 +1,6 @@
+{% include 'notebox_begin' %}
+As stated above, arv-copy is recursive by default and requires a working git repository in the destination cluster. If you do not have a repository created, you can follow the "Adding a new repository":{{site.baseurl}}/user/tutorials/add-new-repository.html page. We will use the *tutorial* repository created in that page as the example.
+
+<br/>In addition, arv-copy requires git when copying to a git repository. Please make sure that git is installed and available.
+
+{% include 'notebox_end' %}
diff --git a/doc/_includes/_install_git_curl.liquid b/doc/_includes/_install_git_curl.liquid
new file mode 100644 (file)
index 0000000..edf07a7
--- /dev/null
@@ -0,0 +1,13 @@
+On a Debian-based system, install the following packages:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install git curl</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, install the following packages:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install git curl</span>
+</code></pre>
+</notextile>
diff --git a/doc/_includes/_install_postgres.liquid b/doc/_includes/_install_postgres.liquid
new file mode 100644 (file)
index 0000000..82c2c2d
--- /dev/null
@@ -0,0 +1,22 @@
+On a Debian-based system, install the following packages:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install libpq-dev postgresql</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, install the following packages:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install postgresql postgresql-devel</span>
+</code></pre>
+</notextile>
+
+{% include 'notebox_begin' %}
+
+If you intend to use specific versions of these packages from Software Collections, you may have to adapt some of the package names to match. For example:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install postgresql92 postgresql92-postgresql-devel</span></code></pre></notextile>
+
+{% include 'notebox_end' %}
diff --git a/doc/_includes/_install_ruby_and_bundler.liquid b/doc/_includes/_install_ruby_and_bundler.liquid
new file mode 100644 (file)
index 0000000..369bf46
--- /dev/null
@@ -0,0 +1,64 @@
+Currently, only Ruby 2.1 is supported.
+
+h4(#rvm). *Option 1: Install with RVM*
+
+<notextile>
+<pre><code><span class="userinput">sudo gpg --keyserver hkp://keys.gnupg.net --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3
+\curl -sSL https://get.rvm.io | sudo bash -s stable --ruby=2.1
+sudo adduser "$USER" rvm
+</span></code></pre></notextile>
+
+Either log out and log back in to activate RVM, or explicitly load it in all open shells like this:
+
+<notextile>
+<pre><code><span class="userinput">source /usr/local/rvm/scripts/rvm
+</span></code></pre></notextile>
+
+Once RVM is activated in your shell, install Bundler:
+
+<notextile>
+<pre><code>~$ <span class="userinput">gem install bundler</span>
+</code></pre></notextile>
+
+h4(#fromsource). *Option 2: Install from source*
+
+Install prerequisites for Debian 7 or 8:
+
+<notextile>
+<pre><code><span class="userinput">sudo apt-get install \
+    bison build-essential gettext libcurl3 libcurl3-gnutls \
+    libcurl4-openssl-dev libpcre3-dev libreadline-dev \
+    libssl-dev libxslt1.1 zlib1g-dev
+</span></code></pre></notextile>
+
+Install prerequisites for CentOS 6:
+
+<notextile>
+<pre><code><span class="userinput">sudo yum install \
+    libyaml-devel glibc-headers autoconf gcc-c++ glibc-devel \
+    patch readline-devel zlib-devel libffi-devel openssl-devel \
+    automake libtool bison sqlite-devel
+</span></code></pre></notextile>
+
+Install prerequisites for Ubuntu 12.04 or 14.04:
+
+<notextile>
+<pre><code><span class="userinput">sudo apt-get install \
+    gawk g++ gcc make libc6-dev libreadline6-dev zlib1g-dev libssl-dev \
+    libyaml-dev libsqlite3-dev sqlite3 autoconf libgdbm-dev \
+    libncurses5-dev automake libtool bison pkg-config libffi-dev
+</span></code></pre></notextile>
+
+Build and install Ruby:
+
+<notextile>
+<pre><code><span class="userinput">mkdir -p ~/src
+cd ~/src
+curl http://cache.ruby-lang.org/pub/ruby/2.1/ruby-2.1.6.tar.gz | tar xz
+cd ruby-2.1.6
+./configure --no-install-rdoc
+make
+sudo make install
+
+sudo gem install bundler</span>
+</code></pre></notextile>
index a371d2489a6165e17ffb01d7248559944ba9afd3..333df4452bac965823886cd4608535f20cc2c743 100644 (file)
@@ -1,3 +1,3 @@
 {% include 'notebox_begin' %}
-This tutorial assumes either that you are logged into an Arvados VM instance (instructions for "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or you have installed the Arvados "Command line SDK":{{site.baseurl}}/sdk/cli/install.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation and have a "working environment.":{{site.baseurl}}/user/getting_started/check-environment.html
+This tutorial assumes that you are logged into an Arvados VM instance (instructions for "Webshell":{{site.baseurl}}/user/getting_started/vm-login-with-webshell.html or "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or you have installed the Arvados "Command line SDK":{{site.baseurl}}/sdk/cli/install.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation and have a "working environment.":{{site.baseurl}}/user/getting_started/check-environment.html
 {% include 'notebox_end' %}
index 51c8f38a49f2411409e1436904b42d74630ee75d..61938447f609b4cb7dbc5b745cdeb108fe479f40 100644 (file)
Binary files a/doc/images/add-new-repository.png and b/doc/images/add-new-repository.png differ
diff --git a/doc/images/added-new-repository.png b/doc/images/added-new-repository.png
new file mode 100644 (file)
index 0000000..7d187aa
Binary files /dev/null and b/doc/images/added-new-repository.png differ
diff --git a/doc/images/api-token-host.png b/doc/images/api-token-host.png
new file mode 100644 (file)
index 0000000..cda04d6
Binary files /dev/null and b/doc/images/api-token-host.png differ
diff --git a/doc/images/vm-access-with-webshell.png b/doc/images/vm-access-with-webshell.png
new file mode 100644 (file)
index 0000000..b980fdc
Binary files /dev/null and b/doc/images/vm-access-with-webshell.png differ
index 77a90e0f521f3c306f9e0e99b426e9be6f4cb128..ad31d96fd1e5b1481208aff2a96ffe6eff7b41ea 100644 (file)
@@ -6,35 +6,34 @@ title: Install the API server
 
 h2. Install prerequisites
 
-The Arvados package repository includes an API server package that can help automate much of the deployment.  It requires:
+The Arvados package repository includes an API server package that can help automate much of the deployment.
 
-* PostgreSQL 9.0+
-* "Ruby 2.1 and bundler":install-manual-prerequisites-ruby.html
-* Build tools and the curl and PostgreSQL development libraries, to build gem dependencies
-* Nginx
+h3(#install_ruby_and_bundler). Install Ruby and Bundler
+
+{% include 'install_ruby_and_bundler' %}
+
+h3(#install_postgres). Install PostgreSQL
+
+{% include 'install_postgres' %}
+
+h3(#build_tools_apiserver). Build tools
 
 On older distributions, you may need to use a backports repository to satisfy these requirements.  For example, on older Red Hat-based systems, consider using the "postgresql92":https://www.softwarecollections.org/en/scls/rhscl/postgresql92/ and "nginx16":https://www.softwarecollections.org/en/scls/rhscl/nginx16/ Software Collections.
 
 On a Debian-based system, install the following packages:
 
 <notextile>
-<pre><code>~$ <span class="userinput">sudo apt-get install bison build-essential libpq-dev libcurl4-openssl-dev postgresql git nginx arvados-api-server</span>
+<pre><code>~$ <span class="userinput">sudo apt-get install bison build-essential libcurl4-openssl-dev git nginx arvados-api-server</span>
 </code></pre>
 </notextile>
 
 On a Red Hat-based system, install the following packages:
 
 <notextile>
-<pre><code>~$ <span class="userinput">sudo yum install bison make automake gcc gcc-c++ libcurl-devel postgresql-server postgresql-devel nginx git arvados-api-server</span>
+<pre><code>~$ <span class="userinput">sudo yum install bison make automake gcc gcc-c++ libcurl-devel nginx git arvados-api-server</span>
 </code></pre>
 </notextile>
 
-{% include 'notebox_begin' %}
-
-If you intend to use specific versions of these packages from Software Collections, you may have to adapt some of the package names to match; e.g., @postgresql92-postgresql-server postgresql92-postgresql-devel nginx16@.
-
-{% include 'notebox_end' %}
-
 h2. Set up the database
 
 Generate a new database password. Nobody ever needs to memorize it or type it, so we'll make a strong one:
@@ -74,8 +73,7 @@ The API server package uses configuration files that you write to @/etc/arvados/
 <pre><code>~$ <span class="userinput">sudo mkdir -p /etc/arvados/api</span>
 ~$ <span class="userinput">sudo chmod 700 /etc/arvados/api</span>
 ~$ <span class="userinput">cd /var/www/arvados-api/current</span>
-/var/www/arvados-api/current$ <span class="userinput">sudo cp config/initializers/omniauth.rb.example /etc/arvados/api/omniauth.rb</span>
-/var/www/arvados-api/current$ <span class="userinput">sudo cp config/database.yml.sample /etc/arvados/api/database.yml</span>
+/var/www/arvados-api/current$ <span class="userinput">sudo cp config/database.yml.example /etc/arvados/api/database.yml</span>
 /var/www/arvados-api/current$ <span class="userinput">sudo cp config/application.yml.example /etc/arvados/api/application.yml</span>
 </code></pre>
 </notextile>
@@ -84,64 +82,119 @@ h2. Configure the database connection
 
 Edit @/etc/arvados/api/database.yml@ and replace the @xxxxxxxx@ database password placeholders with the PostgreSQL password you generated above.
 
-h2. Configure the API server
+h2(#configure_application). Configure the API server
+
+Edit @/etc/arvados/api/application.yml@ to configure the settings described in the following sections.  The deployment script will consistently deploy this to the API server's configuration directory.  The API server reads both @application.yml@ and its own @config/application.default.yml@ file.  The settings in @application.yml@ take precedence over the defaults that are defined in @config/application.default.yml@.  The @config/application.yml.example@ file is not read by the API server and is provided as a starting template only.
 
-Edit @/etc/arvados/api/application.yml@ following the instructions below.  The deployment script will consistently deploy this to the API server's configuration directory.  The API server reads both @application.yml@ and its own @config/application.default.yml@ file.  Values in @application.yml@ take precedence over the defaults that are defined in @config/application.default.yml@.  The @config/application.yml.example@ file is not read by the API server and is provided for installation convenience only.
+@config/application.default.yml@ documents additional configuration settings not listed here.  You can "view the current source version":https://arvados.org/projects/arvados/repository/revisions/master/entry/services/api/config/application.default.yml for reference.
 
-Always put your local configuration in @application.yml@ instead of editing @application.default.yml@.
+Only put local configuration in @application.yml@.  Do not edit @application.default.yml@.
 
 h3(#uuid_prefix). uuid_prefix
 
-Define your @uuid_prefix@ in @application.yml@ by setting the @uuid_prefix@ field in the section for your environment.  This prefix is used for all database identifiers to identify the record as originating from this site.  It must be exactly 5 alphanumeric characters (lowercase ASCII letters and digits).
+Define your @uuid_prefix@ in @application.yml@ by setting the @uuid_prefix@ field in the section for your environment.  This prefix is used for all database identifiers to identify the record as originating from this site.  It must be exactly 5 lowercase ASCII letters and digits.
 
-h3(#git_repositories_dir). git_repositories_dir
+Example @application.yml@:
+
+<notextile>
+<pre><code>  uuid_prefix: <span class="userinput">zzzzz</span></code></pre>
+</notextile>
 
-This field defaults to @/var/lib/arvados/git@. You can override the value by defining it in @application.yml@.
+h3. secret_token
 
-Make sure a clone of the arvados repository exists in @git_repositories_dir@.
+The @secret_token@ is used for for signing cookies.  IMPORTANT: This is a site secret. It should be at least 50 characters.  Generate a random value and set it in @application.yml@:
 
 <notextile>
-<pre><code>~$ <span class="userinput">sudo mkdir -p /var/lib/arvados/git</span>
-~$ <span class="userinput">sudo git clone --bare git://git.curoverse.com/arvados.git /var/lib/arvados/git/arvados.git</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
+yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
 </code></pre></notextile>
 
-h3. secret_token
+Example @application.yml@:
+
+<notextile>
+<pre><code>  secret_token: <span class="userinput">yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy</span></code></pre>
+</notextile>
+
+h3(#blob_signing_key). blob_signing_key
 
-Generate a new secret token for signing cookies:
+The @blob_signing_key@ is used to enforce access control to Keep blocks.  This same key must be provided to the Keepstore daemons when "installing Keepstore servers.":install-keepstore.html  IMPORTANT: This is a site secret. It should be at least 50 characters.  Generate a random value and set it in @application.yml@:
 
 <notextile>
 <pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
-zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 </code></pre></notextile>
 
-Then put that value in the @secret_token@ field.
+Example @application.yml@:
 
-h3. blob_signing_key
+<notextile>
+<pre><code>  blob_signing_key: <span class="userinput">xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx</span></code></pre>
+</notextile>
+
+h3(#omniauth). sso_app_secret, sso_app_id, sso_provider_url
+
+The following settings enable the API server to communicate with the "Single Sign On (SSO) server":install-sso.html to authenticate user log in.
 
-If you want access control on your "Keepstore":install-keepstore.html server(s), you should set @blob_signing_key@ to the same value as the permission key you provide to your Keepstore daemon(s).
+Set @sso_provider_url@ to the base URL where your SSO server is installed.  This should be a URL consisting of the scheme and host (and optionally, port), without a trailing slash.
+
+Set @sso_app_secret@ and @sso_app_id@ to the corresponding values for @app_secret@ and @app_id@ used in the "Create arvados-server client for Single Sign On (SSO)":install-sso.html#client step.
+
+Example @application.yml@:
+
+<notextile>
+<pre><code>  sso_app_id: <span class="userinput">arvados-server</span>
+  sso_app_secret: <span class="userinput">wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww</span>
+  sso_provider_url: <span class="userinput">https://sso.example.com</span>
+</code></pre>
+</notextile>
 
 h3. workbench_address
 
-Fill in the url of your workbench application in @workbench_address@, for example
+Set @workbench_address@ to the URL of your workbench application after following "Install Workbench.":install-workbench-app.html
+
+Example @application.yml@:
 
-&nbsp;&nbsp;https://workbench.@prefix_uuid@.your.domain
+<notextile>
+<pre><code>  workbench_address: <span class="userinput">https://workbench.zzzzz.example.com</span></code></pre>
+</notextile>
 
-h3(#omniauth). sso_app_id, sso_app_secret, sso_provider_url
+h3. websockets_address
 
-For @sso_app_id@ and @sso_app_secret@, provide the same @app_id@ and @app_secret@ used in the "Create arvados-server client for Single Sign On (SSO)":install-sso.html#client step.
+Set @websockets_address@ to the @wss://@ URL of the API server websocket endpoint after following "Set up Web servers.":#set_up
 
-For @sso_provider_url@, provide the base URL where your SSO server is installed: just the scheme and host, with no trailing slash.
+Example @application.yml@:
 
 <notextile>
-<pre><code>  sso_app_id: arvados-server
-  sso_app_secret: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
-  sso_provider_url: https://sso.example.com
+<pre><code>  websockets_address: <span class="userinput">wss://ws.zzzzz.example.com</span></code></pre>
+</notextile>
+
+h3(#git_repositories_dir). git_repositories_dir
+
+The @git_repositories_dir@ setting specifies the directory where user git repositories will be stored.  By default this is @/var/lib/arvados/git@.
+
+Example @application.yml@:
+
+<notextile>
+<pre><code>  git_repositories_dir: <span class="userinput">/var/lib/arvados/git</span>
 </code></pre>
 </notextile>
 
-h3. Other options
+Make sure a clone of the arvados repository exists in @git_repositories_dir@.
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo mkdir -p /var/lib/arvados/git</span>
+~$ <span class="userinput">sudo git clone --bare git://git.curoverse.com/arvados.git /var/lib/arvados/git/arvados.git</span>
+</code></pre></notextile>
 
-Consult @/var/www/arvados-api/current/config/application.default.yml@ for a full list of configuration options. (But don't edit it. Edit @application.yml@ instead.)
+h3(#git_internal_dir). git_internal_dir
+
+The @git_internal_dir@ setting specifies the location of Arvados' internal git repository.  By default this is @/var/lib/arvados/internal.git@.  This repository stores git commits that have been used to run Crunch jobs.  It should _not_ be a subdirectory of @git_repositories_dir@.
+
+Example @application.yml@:
+
+<notextile>
+<pre><code>  git_internal_dir: <span class="userinput">/var/lib/arvados/internal.git</span>
+</code></pre>
+</notextile>
 
 h2. Prepare the API server deployment
 
@@ -155,7 +208,7 @@ You can safely ignore the following error message you may see when loading the d
 
 This command aborts when it encounters an error.  It's safe to rerun multiple times, so if there's a problem with your configuration, you can fix that and try again.
 
-h2. Set up Web servers
+h2(#set_up). Set up Web servers
 
 For best performance, we recommend you use Nginx as your Web server front-end, with a Passenger backend for the main API server and a Puma backend for API server Websockets.  To do that:
 
@@ -170,18 +223,21 @@ For best performance, we recommend you use Nginx as your Web server front-end, w
 <pre><code>#!/bin/bash
 
 set -e
+exec 2>&1
+
 # Uncomment the line below if you're using RVM.
 #source /etc/profile.d/rvm.sh
 
-envdir="/etc/sv/puma/env"
-root=/etc/sv/puma
-echo "Starting puma from ${root}"
-cd $root
-mkdir -p "${envdir}"
-exec 2>&1
+envdir="`pwd`/env"
+mkdir -p "$envdir"
+echo ws-only > "$envdir/ARVADOS_WEBSOCKETS"
+
 cd /var/www/arvados-api/current
+echo "Starting puma in `pwd`"
+
 # You may need to change arguments below to match your deployment, especially -u.
-exec chpst -e "${envdir}" -m 1073741824 -u www-data:www-data bundle exec puma -t 0:512 -e production -b tcp://127.0.0.1:8100
+exec chpst -m 1073741824 -u www-data:www-data -e "$envdir" \
+  bundle exec puma -t 0:512 -e production -b tcp://127.0.0.1:8100
 </code></pre>
 </li>
 
@@ -212,7 +268,7 @@ proxy_http_version 1.1;
 
 server {
   listen       <span class="userinput">[your public IP address]</span>:443 ssl;
-  server_name  <span class="userinput">uuid-prefix.your.domain</span>;
+  server_name  <span class="userinput">uuid_prefix.your.domain</span>;
 
   ssl on;
 
@@ -232,7 +288,7 @@ server {
 
 server {
   listen       <span class="userinput">[your public IP address]</span>:443 ssl;
-  server_name  ws.<span class="userinput">uuid-prefix.your.domain</span>;
+  server_name  ws.<span class="userinput">uuid_prefix.your.domain</span>;
 
   ssl on;
 
index 44423e183fc4f7cf647ca0c37bc6dc7f23a31d51..33b112489e576195497e0b8c0c84d9e0fb03e51a 100644 (file)
@@ -4,7 +4,7 @@ navsection: installguide
 title: Install Git server
 ...
 
-The arv-git-httpd server provides HTTP access to hosted git repositories, using Arvados authentication tokens instead of passwords. It is intended to be installed on the system where your git repositories are stored, and accessed through a web proxy that provides SSL support.
+The arvados-git-httpd server provides HTTP access to hosted git repositories, using Arvados authentication tokens instead of passwords. It is intended to be installed on the system where your git repositories are stored, and accessed through a web proxy that provides SSL support.
 
 By convention, we use the following hostname for the git service:
 
@@ -15,26 +15,26 @@ table(table table-bordered table-condensed).
 
 This hostname should resolve from anywhere on the internet.
 
-h2. Install arv-git-httpd
+h2. Install arvados-git-httpd
 
 On Debian-based systems:
 
 <notextile>
-<pre><code>~$ <span class="userinput">sudo apt-get install git arv-git-httpd</span>
+<pre><code>~$ <span class="userinput">sudo apt-get install git arvados-git-httpd</span>
 </code></pre>
 </notextile>
 
 On Red Hat-based systems:
 
 <notextile>
-<pre><code>~$ <span class="userinput">sudo yum install git arv-git-httpd</span>
+<pre><code>~$ <span class="userinput">sudo yum install git arvados-git-httpd</span>
 </code></pre>
 </notextile>
 
-Verify that @arv-git-httpd@ and @git-http-backend@ are functional:
+Verify that @arvados-git-httpd@ and @git-http-backend@ are functional:
 
 <notextile>
-<pre><code>~$ <span class="userinput">arv-git-httpd -h</span>
+<pre><code>~$ <span class="userinput">arvados-git-httpd -h</span>
 Usage of arv-git-httpd:
   -address="0.0.0.0:80": Address to listen on, "host:port".
   -git-command="/usr/bin/git": Path to git executable. Each authenticated request will execute this program with a single argument, "http-backend".
@@ -49,27 +49,27 @@ fatal: No REQUEST_METHOD from server
 </code></pre>
 </notextile>
 
-We recommend running @arv-git-httpd@ under "runit":http://smarden.org/runit/ or something similar.
+We recommend running @arvados-git-httpd@ under "runit":http://smarden.org/runit/ or something similar.
 
 Your @run@ script should look something like this:
 
 <notextile>
 <pre><code>export ARVADOS_API_HOST=<span class="userinput">uuid_prefix</span>.your.domain
-exec sudo -u git arv-git-httpd -address=:9001 -git-command="$(which git)" -repo-root=<span class="userinput">/var/lib/arvados/git</span> 2>&1
+exec sudo -u git arvados-git-httpd -address=:9001 -git-command="$(which git)" -repo-root=<span class="userinput">/var/lib/arvados/git</span> 2>&1
 </code></pre>
 </notextile>
 
 h3. Set up a reverse proxy with SSL support
 
-The arv-git-httpd service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
+The arvados-git-httpd service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
 
-This is best achieved by putting a reverse proxy with SSL support in front of arv-git-httpd, running on port 443 and passing requests to arv-git-httpd on port 9001 (or whatever port you chose in your run script).
+This is best achieved by putting a reverse proxy with SSL support in front of arvados-git-httpd, running on port 443 and passing requests to arvados-git-httpd on port 9001 (or whatever port you chose in your run script).
 
-h3. Tell the API server about the arv-git-httpd service
+h3. Tell the API server about the arvados-git-httpd service
 
 In your API server's @config/application.yml@ file, add the following entry:
 
 <notextile>
-<pre><code>git_http_base: git.<span class="userinput">uuid_prefix</span>.your.domain
+<pre><code>git_http_base: git.<span class="userinput">uuid_prefix.your.domain</span>
 </code></pre>
 </notextile>
index 767b8e3b8138a418b276ba9c6e51c48c53b1c8ad..f6ad379023e82c830d36b193db57cc199b696b9e 100644 (file)
@@ -24,12 +24,30 @@ On Red Hat-based systems:
 
 {% include 'note_python27_sc' %}
 
-h2. Set up Docker
+h2. Install Docker
 
 Compute nodes must have Docker installed to run jobs inside containers.  This requires a relatively recent version of Linux (at least upstream version 3.10, or a distribution version with the appropriate patches backported).  Follow the "Docker Engine installation documentation":https://docs.docker.com/ for your distribution.
 
 For Debian-based systems, the Arvados package repository includes a backported @docker.io@ package with a known-good version you can install.
 
+h2. Configure Docker
+
+Crunch runs jobs in Docker containers with relatively little configuration.  You may need to start the Docker daemon with specific options to make sure these jobs run smoothly in your environment.  This section highlights options that are useful to most installations.  Refer to the "Docker daemon reference":https://docs.docker.com/reference/commandline/daemon/ for complete information about all available options.
+
+The best way to configure these options varies by distribution.
+
+* If you're using our backported @docker.io@ package, you can list these options in the @DOCKER_OPTS@ setting in @/etc/default/docker.io@.
+* If you're using another Debian-based package, you can list these options in the @DOCKER_OPTS@ setting in @/etc/default/docker@.
+* On Red Hat-based distributions, you can list these options in the @other_args@ setting in @/etc/sysconfig/docker@.
+
+h3. Default ulimits
+
+Docker containers inherit ulimits from the Docker daemon.  However, the ulimits for a single Unix daemon may not accommodate a long-running Crunch job.  You may want to increase default limits for compute jobs by passing @--default-ulimit@ options to the Docker daemon.  For example, to allow jobs to open 10,000 files, set @--default-ulimit nofile=10000:10000@.
+
+h3. DNS
+
+Your containers must be able to resolve the hostname in the ARVADOS_API_HOST environment variable (provided by the Crunch dispatcher) and any hostnames returned in Keep service records.  If these names are not in public DNS records, you may need to set a DNS resolver for the containers by specifying the @--dns@ address with the IP address of an appropriate nameserver.  You may specify this option more than once to use multiple nameservers.
+
 h2. Set up SLURM
 
 Install SLURM following "the same process you used to install the Crunch dispatcher":{{ site.baseurl }}/install/install-crunch-dispatch.html#slurm.
index 374428919dc714045009e25b8dccb26deced6f8e..1566e80a9cc4a7f6519793f101cc5662177e6f40 100644 (file)
@@ -11,7 +11,11 @@ h2. Prerequisites
 # A GNU/Linux (virtual) machine
 # A working Docker installation (see "Installing Docker":https://docs.docker.com/installation/)
 # A working Go installation (see "Install the Go tools":https://golang.org/doc/install)
-# A working Ruby installation (see "Install Ruby and bundler":install-manual-prerequisites-ruby.html)
+# A working Ruby installation, with the Bundler gem installed
+
+h3. Install Ruby and Bundler
+
+{% include 'install_ruby_and_bundler' %}
 
 h2. Download the source tree
 
diff --git a/doc/install/install-manual-prerequisites-ruby.html.textile.liquid b/doc/install/install-manual-prerequisites-ruby.html.textile.liquid
deleted file mode 100644 (file)
index 2142aca..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
----
-layout: default
-navsection: installguide
-title: Install Ruby and bundler
-...
-
-Currently, only Ruby 2.1 is supported.
-
-h2(#rvm). Option 1: Install with rvm
-
-<notextile>
-<pre><code>~$ <span class="userinput">gpg --keyserver hkp://keys.gnupg.net --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3</span>
-~$ <span class="userinput">\curl -sSL https://get.rvm.io | bash -s stable --ruby=2.1</span>
-~$ <span class="userinput">gem install bundler
-</span></code></pre></notextile>
-
-h2(#fromsource). Option 2: Install from source
-
-<notextile>
-<pre><code><span class="userinput">mkdir -p ~/src
-cd ~/src
-wget http://cache.ruby-lang.org/pub/ruby/2.1/ruby-2.1.6.tar.gz
-tar xzf ruby-2.1.6.tar.gz
-cd ruby-2.1.6
-./configure --no-rdoc --no-ri
-make
-sudo make install
-
-sudo gem install bundler</span>
-</code></pre></notextile>
index afe2830b8d385521cdae20aa0e59f8847402d4f7..3087514c1236fc3b1011545b5e4b6f5d5660a92f 100644 (file)
@@ -40,44 +40,43 @@ baseurl=http://rpm.arvados.org/CentOS/$releasever/os/$basearch/
 </code></pre>
 </notextile>
 
-h3. Debian
+h3. Debian and Ubuntu
 
-Packages are available for Debian 7 ("wheezy").  First, register the Curoverse signing key in apt's database:
+Packages are available for Debian 7 ("wheezy"), Ubuntu 12.04 ("precise"), and Ubuntu 14.04 ("trusty").
+
+First, register the Curoverse signing key in apt's database:
 
 {% include 'install_debian_key' %}
 
-Then save the configuration line listed for your version of Debian in @/etc/apt/sources.list.d/arvados.list@:
+Configure apt to retrieve packages from the Arvados package repository. This command depends on your OS vendor and version:
 
 table(table table-bordered table-condensed).
-|*Debian version*|*@/etc/apt/sources.list.d/arvados.list@*|
-|7 ("wheezy")|@deb http://apt.arvados.org/ wheezy main@|
+|OS version|Command|
+|Debian 7 ("wheezy")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
+|Ubuntu 12.04 ("precise")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ precise main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
+|Ubuntu 14.04 ("trusty")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ trusty main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 
-Finally, add this new repository to apt's database:
+{% include 'notebox_begin' %}
 
-<notextile>
-<pre><code>~$ <span class="userinput">sudo /usr/bin/apt-get update</span>
-</code></pre>
-</notextile>
+Arvados packages for Ubuntu may depend on third-party packages in Ubuntu's "universe" repository.  If you're installing on Ubuntu, make sure you have the universe sources uncommented in @/etc/apt/sources.list@.
 
-h3. Ubuntu
-
-Packages are available for Ubuntu 12.04 ("precise").  First, register the Curoverse signing key in apt's database:
-
-{% include 'install_debian_key' %}
+{% include 'notebox_end' %}
 
-Then save the configuration line listed for your version of Ubuntu in @/etc/apt/sources.list.d/arvados.list@:
+Retrieve the package list:
 
-table(table table-bordered table-condensed).
-|*Ubuntu version*|*@/etc/apt/sources.list.d/arvados.list@*|
-|12.04 ("precise")|@deb http://apt.arvados.org/ precise main@|
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get update</span>
+</code></pre>
+</notextile>
 
 h2. A unique identifier
 
-Each Arvados installation should have a globally unique identifier, which is a unique 5-character alphanumeric string. Here is a snippet of ruby that generates such a string based on the hostname of your computer:
+Each Arvados installation should have a globally unique identifier, which is a unique 5-character lowercase alphanumeric string. For testing purposes, here is one way to make a random 5-character string:
 
-<pre>
-Digest::MD5.hexdigest(`hostname`).to_i(16).to_s(36)[0..4]
-</pre>
+<notextile>
+<pre><code>~$ <span class="userinput">tr -dc 0-9a-z &lt;/dev/urandom | head -c5; echo</span>
+</code></pre>
+</notextile>
 
 You may also use a different method to pick the unique identifier. The unique identifier will be part of the hostname of the services in your Arvados cluster. The rest of this documentation will refer to it as your @uuid_prefix@.
 
index 1e72e5eec8ec4b34dc3257e1e429ddbeb8e63719..71e6b01f204bfc1a80fd1f972b0a9d15526cd164 100644 (file)
@@ -43,3 +43,16 @@ On Red Hat-based systems:
 </notextile>
 
 {% include 'note_python27_sc' %}
+
+h2. Update Git Config
+
+Configure git to use the ARVADOS_API_TOKEN environment variable to authenticate to arv-git-httpd.
+
+Execute the following commands to setup the needed configuration.
+
+<notextile>
+<pre>
+<code>~$ <span class="userinput">git config 'credential.https://git.{{ site.arvados_api_host }}/.username' none</span></code>
+<code>~$ <span class="userinput">git config 'credential.https://git.{{ site.arvados_api_host }}/.helper' '!cred(){ cat >/dev/null; if [ "$1" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred'</span></code>
+</pre>
+</notextile>
index c078e46f00f431f3a85a4c76e3c2e49d6a3fff78..4fe1fb157b903ea30447114477e02a2f1c1947ae 100644 (file)
@@ -6,7 +6,17 @@ title: Install Single Sign On (SSO) server
 
 h2(#dependencies). Install dependencies
 
-Make sure you have "Ruby and Bundler":install-manual-prerequisites-ruby.html installed.
+h3(#install_git_curl). Install git and curl
+
+{% include 'install_git_curl' %}
+
+h3(#install_ruby_and_bundler). Install Ruby and Bundler
+
+{% include 'install_ruby_and_bundler' %}
+
+h3(#install_postgres). Install PostgreSQL
+
+{% include 'install_postgres' %}
 
 h2(#install). Install SSO server
 
@@ -16,7 +26,7 @@ h3. Get SSO server code and run bundle
 <pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
 ~$ <span class="userinput">git clone https://github.com/curoverse/sso-devise-omniauth-provider.git</span>
 ~$ <span class="userinput">cd sso-devise-omniauth-provider</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">bundle install</span>
+~/sso-devise-omniauth-provider$ <span class="userinput">bundle install --without=development</span>
 </code></pre></notextile>
 
 h2. Configure the SSO server
@@ -27,13 +37,20 @@ First, copy the example configuration file:
 <pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/application.yml.example config/application.yml</span>
 </code></pre></notextile>
 
-The SSO server reads the @config/application.yml@ file, as well as the @config/application.defaults.yml@ file. Values in @config/application.yml@ take precedence over the defaults that are defined in @config/application.defaults.yml@. The @config/application.yml.example@ file is not read by the SSO server and is provided for installation convenience, only.
+The SSO server reads the @config/application.yml@ file, as well as the @config/application.defaults.yml@ file. Values in @config/application.yml@ take precedence over the defaults that are defined in @config/application.defaults.yml@. The @config/application.yml.example@ file is not read by the SSO server and is provided for installation convenience only.
 
-Consult @config/application.default.yml@ for a full list of configuration options. Always put your local configuration in @config/application.yml@, never edit @config/application.default.yml@.
+Consult @config/application.default.yml@ for a full list of configuration options.  Local configuration goes in @config/application.yml@, do not edit @config/application.default.yml@.
 
 h3(#uuid_prefix). uuid_prefix
 
-Define your @uuid_prefix@ in @config/application.yml@ by setting the @uuid_prefix@ field in the section for your environment.  This prefix is used for all database identifiers to identify the record as originating from this site.  It must be exactly 5 alphanumeric characters (lowercase ASCII letters and digits).
+Generate a uuid prefix for the single sign on service.  This prefix is used to identify user records as originating from this site.  It must be exactly 5 lowercase ASCII letters and/or digits.  You may use the following snippet to generate a uuid prefix:
+
+<notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts "#{rand(2**64).to_s(36)[0,5]}"'</span>
+abcde
+</code></pre></notextile>
+
+Edit @config/application.yml@ and set @uuid_prefix@ in the "common" section.
 
 h3(#secret_token). secret_token
 
@@ -44,84 +61,11 @@ Generate a new secret token for signing cookies:
 zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
 </code></pre></notextile>
 
-Then put that value in the @secret_token@ field.
-
-h3(#authentication_methods). Authentication methods
-
-Three authentication methods are supported: google OAuth2, ldap, local accounts.
-
-h3(#google_oauth2). google_oauth2 authentication
-
-Google OAuth2 authentication can be configured with these options.
-
-<pre>
-  # Google API tokens required for OAuth2 login.
-  #
-  # See https://github.com/zquestz/omniauth-google-oauth2
-  #
-  # and https://developers.google.com/accounts/docs/OAuth2
-  google_oauth2_client_id: false
-  google_oauth2_client_secret: false
-
-  # Set this to your OpenId 2.0 realm to enable migration from Google OpenId
-  # 2.0 to Google OAuth2 OpenId Connect (Google will provide OpenId 2.0 user
-  # identifiers via the openid.realm parameter in the OAuth2 flow until 2017).
-  google_openid_realm: false
-</pre>
-
-h3(#ldap). ldap authentication
-
-LDAP authentication can be configured with these options. Make sure to preserve the indentation of the fields beyond @use_ldap@.
-
-<pre>
-  # Enable LDAP support.
-  #
-  # If you want to use LDAP, you need to provide
-  # the following set of fields under the use_ldap key.
-  #
-  # use_ldap: false
-  #   title: Example LDAP
-  #   host: ldap.example.com
-  #   port: 636
-  #   method: ssl
-  #   base: "ou=Users, dc=example, dc=com"
-  #   uid: uid
-  #   email_domain: example.com
-  #   #bind_dn: "some_user"
-  #   #password: "some_password"
-  use_ldap: false
-</pre>
-
-h3(#local_accounts). local account authentication
-
-If neither Google OAuth2 nor LDAP are enabled, the SSO server automatically
-falls back to local accounts. There are two configuration options for local
-accounts:
+Edit @config/application.yml@ and set @secret_token@ in the "common" section.
 
-<pre>
-  # If true, allow new creation of new accounts in the SSO server's internal
-  # user database.
-  allow_account_registration: false
-
-  # If true, send an email confirmation before activating new accounts in the
-  # SSO server's internal user database.
-  require_email_confirmation: false
-</pre>
+h2(#database). Set up the database
 
-You can also create local accounts on the SSO server from the rails console:
-
-<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
-:001 &gt; <span class="userinput">user = User.new(:email =&gt; "test@example.com")</span>
-:002 &gt; <span class="userinput">user.password = "passw0rd"</span>
-:003 &gt; <span class="userinput">user.save!</span>
-:004 &gt; <span class="userinput">quit</span>
-</code></pre>
-</notextile>
-
-h2. Set up the database
-
-Generate a new database password. Nobody ever needs to memorize it or type it, so we'll make a strong one:
+Generate a new database password. Nobody ever needs to memorize it or type it, so make a strong one:
 
 <notextile>
 <pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
@@ -136,10 +80,10 @@ Enter password for new role: <span class="userinput">paste-database-password-you
 Enter it again: <span class="userinput">paste-database-password-you-generated</span>
 </code></pre></notextile>
 
-Configure SSO server to connect to your database by creating and updating @config/database.yml@. Replace the @xxxxxxxx@ database password placeholders with the new password you generated above.
+Configure SSO server to connect to your database by creating and updating @config/database.yml@. Replace the @xxxxxxxx@ database password placeholders with the new password you generated above.  If you are planning a production system, update the @production@ section, otherwise use @development@.
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/database.yml.sample config/database.yml</span>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/database.yml.example config/database.yml</span>
 ~/sso-devise-omniauth-provider$ <span class="userinput">edit config/database.yml</span>
 </code></pre></notextile>
 
@@ -152,23 +96,14 @@ Create and initialize the database. If you are planning a production system, cho
 Alternatively, if the database user you intend to use for the SSO server is not allowed to create new databases, you can create the database first and then populate it with rake. Be sure to adjust the database name if you are using the @development@ environment. This sequence of commands is functionally equivalent to the rake db:setup command above:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">su postgres createdb arvados_sso_production -E UTF8 -O arvados_sso</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:structure:load</span>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">sudo -u postgres createdb arvados_sso_production -E UTF8 -O arvados_sso -T template0</span>
+~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:schema:load</span>
 ~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:seed</span>
 </code></pre></notextile>
 
-h2(#client). Generate assets
-
-If you are running in the production environment, you'll want to generate the assets:
-
-<notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake assets:precompile</span>
-</code></pre>
-</notextile>
-
 h2(#client). Create arvados-server client
 
-Use @rails console@ to create a @Client@ record that will be used by the Arvados API server.  The values of @app_id@ and @app_secret@ correspond to the @APP_ID@ and @APP_SECRET@ that must be set in in "Setting up Omniauth in the API server.":install-api-server.html#omniauth
+Use @rails console@ to create a @Client@ record that will be used by the Arvados API server.  The values of @app_id@ and @app_secret@ correspond to the values for @sso_app_id@ and @sso_app_secret@ in the "API server's SSO settings.":install-api-server.html#omniauth
 
 <notextile>
 <pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
@@ -183,17 +118,121 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 </code></pre>
 </notextile>
 
-h2. Start the SSO server
+h2(#assets). Precompile assets
+
+If you are running in the production environment, you must precompile the assets:
+
+<notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake assets:precompile</span>
+</code></pre>
+</notextile>
+
+h2(#authentication_methods). Authentication methods
+
+Authentication methods are configured in @application.yml@.  Currently three authentication methods are supported: local accounts, LDAP, and Google+.  If neither Google+ nor LDAP are enabled, the SSO server defaults to local user accounts.   Only one authentication mechanism should be in use at a time.
 
-h3. Run a simple standalone server
+h3(#local_accounts). Local account authentication
 
-You can use the Webrick server that is bundled with Ruby to quickly verify that your installation is functioning:
+There are two configuration options for local accounts:
+
+<pre>
+  # If true, allow new creation of new accounts in the SSO server's internal
+  # user database.
+  allow_account_registration: false
+
+  # If true, send an email confirmation before activating new accounts in the
+  # SSO server's internal user database (otherwise users are activated immediately.)
+  require_email_confirmation: false
+</pre>
+
+For more information about configuring backend support for sending email (required to send email confirmations) see "Configuring Action Mailer":http://guides.rubyonrails.org/configuring.html#configuring-action-mailer
+
+If @allow_account_registration@ is false, you may manually create local accounts on the SSO server from the rails console:
 
 <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails server</span>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
+:001 &gt; <span class="userinput">user = User.new(:email =&gt; "test@example.com")</span>
+:002 &gt; <span class="userinput">user.password = "passw0rd"</span>
+:003 &gt; <span class="userinput">user.save!</span>
+:004 &gt; <span class="userinput">quit</span>
+</code></pre>
+</notextile>
+
+h3(#ldap). LDAP authentication
+
+The following options are available to configure LDAP authentication.  Note that you must preserve the indentation of the fields listed under @use_ldap@.
+
+<pre>
+  use_ldap:
+    title: Example LDAP
+    host: ldap.example.com
+    port: 636
+    method: ssl
+    base: "ou=Users, dc=example, dc=com"
+    uid: uid
+    email_domain: example.com
+    #bind_dn: "some_user"
+    #password: "some_password"
+</pre>
+
+table(table).
+|_. Option|_. Description|
+|title |Title displayed to the user on the login page|
+|host  |LDAP server hostname|
+|port  |LDAP server port|
+|method|One of "plain", "ssl", "tls"|
+|base  |Directory lookup base|
+|uid   |User id field used for directory lookup|
+|email_domain|Strip off specified email domain from login and perform lookup on bare username|
+|bind_dn|If required by server, username to log with in before performing directory lookup|
+|password|If required by server, password to log with before performing directory lookup|
+
+h3(#google). Google+ authentication
+
+In order to use Google+ authentication, you must use the <a href="https://console.developers.google.com" target="_blank">Google Developers Console</a> to create a set of client credentials.
+
+# Go to the <a href="https://console.developers.google.com" target="_blank">Google Developers Console</a> and select or create a project; this will take you to the project page.
+# On the sidebar, click on *APIs & auth* then select *APIs*.
+## Search for *Contacts API* and click on *Enable API*.
+## Search for *Google+ API* and click on *Enable API*.
+# On the sidebar, click on *Credentials*; under *OAuth* click on *Create new Client ID* to bring up the *Create Client ID* dialog box.
+# Under *Application type* select *Web application*.
+# If the authorization origins are not displayed, clicking on *Create Client ID* will take you to *Consent screen* settings.
+## On consent screen settings, enter the appropriate details and click on *Save*.
+## This will return you to the *Create Client ID* dialog box.
+# You must set the authorization origins.  Edit @sso.your-site.com@ to the appropriate hostname that you will use to access the SSO service:
+## JavaScript origin should be @https://sso.your-site.com/@
+## Redirect URI should be @https://sso.your-site.com/users/auth/google_oauth2/callback@
+# Copy the values of *Client ID* and *Client secret* from the Google Developers Console into the Google section of @config/application.yml@, like this:
+
+<notextile>
+<pre><code>  # Google API tokens required for OAuth2 login.
+  google_oauth2_client_id: <span class="userinput">"---YOUR---CLIENT---ID---HERE--"-</span>
+  google_oauth2_client_secret: <span class="userinput">"---YOUR---CLIENT---SECRET---HERE--"-</span></code></pre></notextile>
+
+h2(#start). Start the SSO server
+
+h3. Run a standalone passenger server
+
+<notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production passenger start</span>
+=============== Phusion Passenger Standalone web server started ===============
+...
 </code></pre>
 </notextile>
 
-h3. Production environment
+You can now test your installation by going to the page reported by passenger as "Accessible via: ..."
+
+Note: if you get the following warning "you may safely ignore it:":https://stackoverflow.com/questions/10374871/no-secret-option-provided-to-racksessioncookie-warning
 
-As a Ruby on Rails application, the SSO server should be compatible with any Ruby application server that supports Rack applications.  We recommend "Passenger":https://www.phusionpassenger.com/ to run the SSO server in production.
+<pre>
+Connecting to database specified by database.yml
+App 4574 stderr:         SECURITY WARNING: No secret option provided to Rack::Session::Cookie.
+App 4574 stderr:         This poses a security threat. It is strongly recommended that you
+App 4574 stderr:         provide a secret to prevent exploits that may be possible from crafted
+App 4574 stderr:         cookies. This will not be supported in future versions of Rack, and
+App 4574 stderr:         future versions will even invalidate your existing user cookies.
+App 4574 stderr:
+App 4574 stderr:         Called from: /var/lib/gems/2.1.0/gems/actionpack-3.2.8/lib/action_dispatch/middleware/session/abstract_store.rb:28:in `initialize'.
+App 4592 stdout:
+</pre>
index 1a65f2de38235c529713e66c6d33085e710a990f..662a5e5ebc9d5f3620fee11d8a01d37b86551428 100644 (file)
@@ -6,13 +6,13 @@ title: Install Workbench
 
 h2. Install prerequisites
 
-The Arvados package repository includes Workbench server package that can help automate much of the deployment.  It requires:
+The Arvados package repository includes Workbench server package that can help automate much of the deployment.
 
-* "Ruby 2.1 and bundler":install-manual-prerequisites-ruby.html
-* The Arvados Python SDK
-* Graphviz
-* Build tools to build gem dependencies
-* Nginx
+h3(#install_ruby_and_bundler). Install Ruby and Bundler
+
+{% include 'install_ruby_and_bundler' %}
+
+h3(#build_tools_workbench). Build tools
 
 Workbench doesn't need its own database, so it does not need to have PostgreSQL installed.
 
index df5507702443103be54c9db56122404c9de9b05b..9db56b9bbd1bdad70711e0505a0c6fdb85634fd7 100644 (file)
@@ -8,9 +8,9 @@ title: "Installation"
 
 To use the @arv@ command, you can either install the @arvados-cli@ gem via RubyGems or build and install the package from source.
 
-h4. Prerequisites: Ruby &gt;= 2.1.0 and curl libraries
+h3. Prerequisites: Ruby, Bundler, and curl libraries
 
-Make sure you have "Ruby and bundler":{{site.baseurl}}/install/install-manual-prerequisites-ruby.html installed.
+{% include 'install_ruby_and_bundler' %}
 
 Install curl libraries with your system's package manager. For example, on Debian or Ubuntu:
 
@@ -20,7 +20,7 @@ $ <code class="userinput">sudo apt-get install libcurl3 libcurl3-gnutls libcurl4
 </pre>
 </notextile>
 
-h4. Option 1: install with RubyGems
+h3. Option 1: Install with RubyGems
 
 <notextile>
 <pre>
@@ -28,7 +28,7 @@ $ <code class="userinput">sudo gem install arvados-cli</code>
 </pre>
 </notextile>
 
-h4. Option 2: build and install from source
+h3. Option 2: Build and install from source
 
 <notextile>
 <pre>
index c7655ba78e9a22b7068d02fba1ea9565df5f9d13..3184c0929c2add50e6140a0f7d94696418ad32c2 100644 (file)
@@ -387,7 +387,7 @@ optional arguments:
                         exit
   --local               Run locally using arv-run-pipeline-instance
   --docker-image DOCKER_IMAGE
-                        Docker image to use, default arvados/jobs
+                        Docker image to use, otherwise use instance default.
   --ignore-rcode        Commands that return non-zero return codes should not
                         be considered failed.
   --no-reuse            Do not reuse past jobs.
index 46156b7f85e2b5e102b42cb5d4ef158aab18c79d..4a5105bdd8bbb0e9c538e1281b586501b46fb742 100644 (file)
@@ -4,7 +4,7 @@ navsection: userguide
 title: "Checking your environment"
 ...
 
-First, log into an Arvados VM instance (instructions for "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or install the Arvados "Command line SDK":{{site.baseurl}}/sdk/cli/install.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation.
+First, log into an Arvados VM instance (instructions for "Webshell":{{site.baseurl}}/user/getting_started/vm-login-with-webshell.html or "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or install the Arvados "Command line SDK":{{site.baseurl}}/sdk/cli/install.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation.
 
 Check that you are able to access the Arvados API server using @arv user current@.  If it is able to access the API server, it will print out information about your account:
 
index 83513b8a26a999707f83057ed27af34d19be2832..a9eb8c135943fd58a9f7b9d91627af19059386a0 100644 (file)
@@ -4,7 +4,7 @@ navsection: userguide
 title: Accessing an Arvados VM with SSH - Unix Environments
 ...
 
-This document is for Unix environments (Linux, OS X, Cygwin). If you are using a Windows environment, please visit the "Accessing an Arvados VM with SSH - Windows Environments":ssh-access-windows.html page.
+This document is for accessing an arvados VM using SSK keys in Unix environments (Linux, OS X, Cygwin). If you would like to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Windows environment, please visit the "Accessing an Arvados VM with SSH - Windows Environments":ssh-access-windows.html page.
 
 {% include 'ssh_intro' %}
 
index 7a9ab27d39f46025ecdc6a69e5075ae1317989df..c3a06405493d9c340b010547e88057aacb6039bf 100644 (file)
@@ -4,7 +4,7 @@ navsection: userguide
 title: Accessing an Arvados VM with SSH - Windows Environments
 ...
 
-This document is for Windows environments. If you are using a Unix environment (Linux, OS X, Cygwin), please visit the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.
+This document is for accessing an arvados VM using SSK keys in Windows environments. If you would like to use to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Unix environment (Linux, OS X, Cygwin), please visit the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.
 
 {% include 'ssh_intro' %}
 
diff --git a/doc/user/getting_started/vm-login-with-webshell.html.textile.liquid b/doc/user/getting_started/vm-login-with-webshell.html.textile.liquid
new file mode 100644 (file)
index 0000000..bfe6686
--- /dev/null
@@ -0,0 +1,19 @@
+---
+layout: default
+navsection: userguide
+title: Accessing an Arvados VM with Webshell
+...
+
+This document describes how to access an Arvados VM with Webshell from Workbench.
+
+h2(#webshell). Access VM using webshell
+
+Webshell gives you access to an arvados virtual machine from your browser with no additional setup.
+
+In the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Manage account* to go to the account management page. In the *Manage account* page, you will see the *Virtual Machines* panel that lists the virtual machines you can access.
+
+Each row in the Virtual Machines panel lists the hostname of the VM, along with a <code>Log in as *you*</code> button under the column "Web shell beta". Clinking on this button will open up a webshell terminal for you in a new browser tab and log you in.
+
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/vm-access-with-webshell.png!
+
+You are now ready to work in your Arvados VM.
index f9f8af88362ccadbf89816d41af01230c7d512e9..49cdb9065f61cf164cf726769579930614aec1e5 100644 (file)
@@ -8,7 +8,7 @@ The Arvados API token is a secret key that enables the @arv@ command line client
 
 Access the Arvados Workbench using this link: "{{site.arvados_workbench_host}}/":{{site.arvados_workbench_host}}/  (Replace the hostname portion with the hostname of your local Arvados instance if necessary.)
 
-Open a shell on the system where you want to use the Arvados client. This may be your local workstation, or an Arvados virtual machine accessed with SSH (instructions for "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login).
+Open a shell on the system where you want to use the Arvados client. This may be your local workstation, or an Arvados virtual machine accessed with "Webshell":{{site.baseurl}}/user/getting_started/vm-login-with-webshell.html or SSH (instructions for "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login).
 
 Click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access your account menu, then click on the menu item *Manage account* to go to the account management page. On the *Manage account* page, you will see the *Current Token* panel, which lists your current token and instructions to set up your environment.
 
diff --git a/doc/user/topics/arv-copy.html.textile.liquid b/doc/user/topics/arv-copy.html.textile.liquid
new file mode 100644 (file)
index 0000000..b20b8bf
--- /dev/null
@@ -0,0 +1,80 @@
+---
+layout: default
+navsection: userguide
+title: "Using arv-copy"
+...
+
+
+This tutorial describes how to copy Arvados objects from one cluster to another by using @arv-copy@.
+
+{% include 'tutorial_expectations' %}
+
+h2. arv-copy
+
+@arv-copy@ allows users to copy collections, pipeline templates, and pipeline instances from one cluster to another. By default, @arv-copy@ will recursively go through a template or instance and copy all dependencies associated with the object.
+
+For example, let's copy from our <a href="https://cloud.curoverse.com/">beta cloud instance *qr1hi*</a> to *dst_cluster*. The names *qr1hi* and *dst_cluster* are interchangable with any cluster name. You can find the cluster name from the prefix of the uuid of the object you want to copy. For example, in *qr1hi*-4zz18-tci4vn4fa95w0zx, the cluster name is qr1hi.
+
+In order for the clusters to be able to communicate with each other, you must create custom configuration files for both clusters. First, go to your *Manage account* page in Workbench and copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ in both of your clusters. Then, create two configuration files, one for each cluster. The names of the files must have the format of *uuid_prefix.conf*. In our example, let's make two files, one for *qr1hi* and one for *dst_cluster*. From your *Manage account* page in *qr1hi* and *dst_cluster*, copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@.
+
+!{display: block;margin-left: 25px;margin-right: auto;}{{ site.baseurl }}/images/api-token-host.png!
+
+Copy your @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ into the config files as shown below in the shell account from which you are executing the commands. For example, the default shell you may have access to is shell.qr1hi. You can add these files in ~/.config/arvados/ in the qr1hi shell terminal.
+
+<notextile>
+<pre><code>~$ <span class="userinput">cd ~/.config/arvados</span>
+~$ <span class="userinput">echo "ARVADOS_API_HOST=qr1hi.arvadosapi.com" >> qr1hi.conf</span>
+~$ <span class="userinput">echo "ARVADOS_API_TOKEN=123456789abcdefghijkl" >> qr1hi.conf</span>
+~$ <span class="userinput">echo "ARVADOS_API_HOST=dst_cluster.arvadosapi.com" >> dst_cluster.conf</span>
+~$ <span class="userinput">echo "ARVADOS_API_TOKEN=987654321lkjihgfedcba" >> dst_cluster.conf</span>
+</code></pre>
+</notextile>
+
+Now you're ready to copy between *qr1hi* and *dst_cluster*!
+
+h3. How to copy a collection
+
+First, select the uuid of the collection you want to copy from the source cluster. The uuid can be found in the collection display page in the collection summary area (top left box), or from the URL bar (the part after @collections/...@)
+
+Now copy the collection from *qr1hi* to *dst_cluster*. We will use the uuid @qr1hi-4zz18-tci4vn4fa95w0zx@ as an example. You can find this collection in the <a href="https://cloud.curoverse.com/collections/qr1hi-4zz18-tci4vn4fa95w0zx">lobSTR v.3 project on cloud.curoverse.com</a>.
+<notextile>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster qr1hi-4zz18-tci4vn4fa95w0zx</span>
+qr1hi-4zz18-tci4vn4fa95w0zx: 6.1M / 6.1M 100.0% 
+arvados.arv-copy[1234] INFO: Success: created copy with uuid dst_cluster-4zz18-8765943210cdbae
+</code></pre>
+</notextile>
+
+The output of arv-copy displays the uuid of the collection generated in the destination cluster. By default, the output is placed in your home project in the destination cluster. If you want to place your collection in a pre-created project, you can specify the project you want it to be in using the tag @--project-uuid@ followed by the project uuid.
+
+For example, this will copy the collection to project dst_cluster-j7d0g-a894213ukjhal12 in the destination cluster.
+
+<notextile> <pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --project-uuid dst_cluster-j7d0g-a894213ukjhal12 qr1hi-4zz18-tci4vn4fa95w0zx</span> 
+</code></pre>
+</notextile>
+
+h3. How to copy a pipeline template or pipeline instance
+
+{% include 'arv_copy_expectations' %}
+
+We will use the uuid @qr1hi-d1hrv-nao0ohw8y7dpf84@ as an example pipeline instance.
+
+<notextile>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial qr1hi-d1hrv-nao0ohw8y7dpf84</span>
+To git@git.dst_cluster.arvadosapi.com:$USER/tutorial.git
+ * [new branch] git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d -> git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d
+arvados.arv-copy[19694] INFO: Success: created copy with uuid dst_cluster-d1hrv-rym2h5ub9m8ofwj
+</code></pre>
+</notextile>
+
+New branches in the destination git repo will be created for each branch used in the pipeline template. For example, if your source branch was named ac21f0d45a76294aaca0c0c0fdf06eb72d03368d, your new branch will be named @git_git_qr1hi_arvadosapi_com_reponame_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d@. 
+
+By default, if you copy a pipeline instance recursively, you will find that the template as well as all the dependencies are in your home project.
+
+If you would like to copy the object without dependencies, you can use the @--no-recursive@ tag.
+
+For example, we can copy the same object using this tag.
+
+<notextile>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial --no-recursive qr1hi-d1hrv-nao0ohw8y7dpf84</span>
+</code></pre>
+</notextile>
index 9eabfacf70734ca4c53123a34945b146092c8c07..8d1aca63057f44f22abb5811c2ef73c8478e4129 100644 (file)
@@ -134,7 +134,7 @@ Running pipeline qr1hi-d1hrv-slcnhq5czo764b1
 
 h2. Additional options
 
-* @--docker-image IMG@ : By default, commands run inside a Docker container created from the latest "arvados/jobs" Docker image.  Use this option to specify a different image to use.  Note: the Docker image must be uploaded to Arvados using @arv keep docker@.
+* @--docker-image IMG@ : By default, commands run based in a container created from the @default_docker_image_for_jobs@ setting on the API server.  Use this option to specify a different image to use.  Note: the Docker image must be uploaded to Arvados using @arv keep docker@.
 * @--dry-run@ : Print out the final Arvados pipeline generated by @arv-run@ without submitting it.
 * @--local@ : By default, the pipeline will be submitted to your configured Arvados instance.  Use this option to run the command locally using @arv-run-pipeline-instance --run-jobs-here@.
 * @--ignore-rcode@ : Some commands use non-zero exit codes to indicate nonfatal conditions (e.g. @grep@ returns 1 when no match is found).  Set this to indicate that commands that return non-zero return codes should not be considered failed.
index e42f1298aee438aa5806c8b5c53afcf530858070..5057fda9905b07e371e27f56608f421985f79995 100644 (file)
@@ -23,7 +23,7 @@ On the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg
 
 In the *Manage account* page, you will see the *Repositories* panel with the *Add new repository* button.
 
-!{{ site.baseurl }}/images/repositories-panel.png!
+!{display: block;margin-left: 25px;margin-right: auto;}{{ site.baseurl }}/images/repositories-panel.png!
 
 Click the *Add new Repository* button to open the popup to add a new arvados repository. You will see a text box where you can enter the name of the repository. Enter *tutorial* in this text box and click on *Create*.
 
@@ -31,10 +31,12 @@ Click the *Add new Repository* button to open the popup to add a new arvados rep
 The name you enter here must begin with a letter and can only contain alphanumeric characters.
 {% include 'notebox_end' %}
 
-!{{ site.baseurl }}/images/add-new-repository.png!
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/add-new-repository.png!
 
-This will create a new repository with the name @$USER/tutorial@ with the URL <notextile><code>git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</code></notextile>
+This will create a new repository with the name @$USER/tutorial@. It can be accessed using the URL <notextile><code>https://git.{{ site.arvados_api_host }}/$USER/tutorial.git</code></notextile> or <notextile><code>git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</code></notextile>
 
-Back in the *Repositories* panel in the *Manage account* page, you should see the @$USER/tutorial@ repository listed in the name column with the URL <notextile><code>git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</code></notextile>.
+Back in the *Repositories* panel in the *Manage account* page, you should see the @$USER/tutorial@ repository listed in the name column with these URLs.
+
+!{display: block;margin-left: 25px;margin-right: auto;}{{ site.baseurl }}/images/added-new-repository.png!
 
 You are now ready to use this *tutorial* repository to run your crunch scripts.
index de607845d2800e4b19b6fa79f89e9b4d40a33e94..a77273763db045473c791c1e8d6dab42cf0c02c3 100644 (file)
@@ -24,19 +24,28 @@ All Crunch scripts are managed through the Git revision control system.  Before
 
 On the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Manage account* to go to the account management page.
 
-On the *Manage account* page, you will see *Repositories* panel. In this panel, you should see the @$USER/tutorial@ repository listed in the *name* column.  Next to *name* is the column *URL*. Copy the *URL* value associated with your repository.  This should look like <notextile><code>git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</code></notextile>.
+On the *Manage account* page, you will see *Repositories* panel. In this panel, you should see the @$USER/tutorial@ repository listed in the *name* column.  Next to *name* is the column *URL*. Copy the *URL* value associated with your repository.  This should look like <notextile><code>https://git.{{ site.arvados_api_host }}/$USER/tutorial.git</code></notextile>. Alternatively, you can use <notextile><code>git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</code></notextile>
 
 Next, on the Arvados virtual machine, clone your Git repository:
 
 <notextile>
 <pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
-~$ <span class="userinput">git clone git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</span>
+~$ <span class="userinput">git clone https://git.{{ site.arvados_api_host }}/$USER/tutorial.git</span>
 Cloning into 'tutorial'...</code></pre>
 </notextile>
 
 This will create a Git repository in the directory called @tutorial@ in your home directory. Say yes when prompted to continue with connection.
 Ignore any warning that you are cloning an empty repository.
 
+*Note:* If you are prompted for username and password when you try to git clone using this command, you may first need to update your git configuration. Execute the following commands to update your git configuration.
+
+<notextile>
+<pre>
+<code>~$ <span class="userinput">git config 'credential.https://git.{{ site.arvados_api_host }}/.username' none</span></code>
+<code>~$ <span class="userinput">git config 'credential.https://git.{{ site.arvados_api_host }}/.helper' '!cred(){ cat >/dev/null; if [ "$1" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred'</span></code>
+</pre>
+</notextile>
+
 {% include 'notebox_begin' %}
 For more information about using Git, try
 
index 6d014b91fa1665d031ea1caa283b576028c8ed70..75702960133b3be2555b851babb9d5bf92e4c9a3 100644 (file)
@@ -20,8 +20,14 @@ clean:
 DEBIAN_IMAGE := $(shell $(DOCKER) images -q arvados/debian |head -n1)
 
 REALCLEAN_CONTAINERS := $(shell $(DOCKER) ps -a |grep -e arvados -e api_server -e keep_server -e keep_proxy_server -e doc_server -e workbench_server |cut -f 1 -d' ')
-REALCLEAN_IMAGES := $(shell $(DOCKER) images -q arvados/* |grep -v $(DEBIAN_IMAGE) 2>/dev/null)
-DEEPCLEAN_IMAGES := $(shell $(DOCKER) images -q arvados/*)
+# Generate a list of docker images tagged as arvados/*
+# but exclude those tagged as arvados/build
+ADI_TEMPFILE := $(shell mktemp)
+ARVADOS_DOCKER_IMAGES := $(shell $(DOCKER) images -q arvados/* |sort > $(ADI_TEMPFILE))
+ABDI_TEMPFILE := $(shell mktemp)
+ARVADOS_BUILD_DOCKER_IMAGES := $(shell $(DOCKER) images -q arvados/build |sort > $(ABDI_TEMPFILE))
+REALCLEAN_IMAGES := $(shell comm -3 $(ADI_TEMPFILE) $(ABDI_TEMPFILE) |grep -v $(DEBIAN_IMAGE) 2>/dev/null)
+DEEPCLEAN_IMAGES := $(shell comm -3 $(ADI_TEMPFILE) $(ABDI_TEMPFILE))
 SKYDNS_CONTAINERS := $(shell $(DOCKER) ps -a |grep -e crosbymichael/skydns -e crosbymichael/skydock |cut -f 1 -d' ')
 SKYDNS_IMAGES := $(shell $(DOCKER) images -q crosbymichael/skyd*)
 
index c84d89bf4a5533c0549789fb88f19dd8de1b1ee2..fc5005ab9cf089a94f0fd5d1f74ff5cfd7220f04 100755 (executable)
@@ -126,7 +126,7 @@ my $jobspec;
 my $job_api_token;
 my $no_clear_tmp;
 my $resume_stash;
-my $docker_bin = "/usr/bin/docker.io";
+my $docker_bin = "docker.io";
 GetOptions('force-unlock' => \$force_unlock,
            'git-dir=s' => \$git_dir,
            'job=s' => \$jobspec,
@@ -169,8 +169,7 @@ if ($jobspec =~ /^[-a-z\d]+$/)
 }
 else
 {
-  $Job = JSON::decode_json($jobspec);
-  $local_job = 1;
+  $local_job = JSON::decode_json($jobspec);
 }
 
 
@@ -178,7 +177,7 @@ else
 # at least able to run basic commands: they aren't down or severely
 # misconfigured.
 my $cmd = ['true'];
-if ($Job->{docker_image_locator}) {
+if (($Job || $local_job)->{docker_image_locator}) {
   $cmd = [$docker_bin, 'ps', '-q'];
 }
 Log(undef, "Sanity check is `@$cmd`");
@@ -208,15 +207,15 @@ else
 {
   if (!$resume_stash)
   {
-    map { croak ("No $_ specified") unless $Job->{$_} }
+    map { croak ("No $_ specified") unless $local_job->{$_} }
     qw(script script_version script_parameters);
   }
 
-  $Job->{'is_locked_by_uuid'} = $User->{'uuid'};
-  $Job->{'started_at'} = gmtime;
-  $Job->{'state'} = 'Running';
+  $local_job->{'is_locked_by_uuid'} = $User->{'uuid'};
+  $local_job->{'started_at'} = gmtime;
+  $local_job->{'state'} = 'Running';
 
-  $Job = api_call("jobs/create", job => $Job);
+  $Job = api_call("jobs/create", job => $local_job);
 }
 $job_id = $Job->{'uuid'};
 
@@ -396,7 +395,7 @@ if (!defined $no_clear_tmp) {
     # TODO: When #5036 is done and widely deployed, we can get rid of the
     # regular expression and just unmount everything with type fuse.keep.
     srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
-          ['bash', '-ec', 'mount -t fuse,fuse.keep | awk \'($3 ~ /\ykeep\y/){print $3}\' | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
+          ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk \'($3 ~ /\ykeep\y/){print $3}\' | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
     exit (1);
   }
   while (1)
@@ -405,7 +404,10 @@ if (!defined $no_clear_tmp) {
     freeze_if_want_freeze ($cleanpid);
     select (undef, undef, undef, 0.1);
   }
-  Log (undef, "Cleanup command exited ".exit_status_s($?));
+  if ($?) {
+    Log(undef, "Clean work dirs: exit ".exit_status_s($?));
+    exit(EX_RETRY_UNLOCKED);
+  }
 }
 
 # If this job requires a Docker image, install that.
@@ -596,7 +598,7 @@ else {
   unless ($? == 0 && $sha1 =~ /^([0-9a-f]{40})$/) {
     croak("`$gitcmd rev-list` exited "
           .exit_status_s($?)
-          .", '$treeish' not found. Giving up.");
+          .", '$treeish' not found, giving up");
   }
   $commit = $1;
   Log(undef, "Version $treeish is commit $commit");
@@ -866,13 +868,6 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
         $command .= "--memory=\${MEMLIMIT}k --memory-swap=\${SWAPLIMIT}k ";
       }
 
-      # Dynamically configure the container to use the host system as its
-      # DNS server.  Get the host's global addresses from the ip command,
-      # and turn them into docker --dns options using gawk.
-      $command .=
-          q{$(ip -o address show scope global |
-              gawk 'match($4, /^([0-9\.:]+)\//, x){print "--dns", x[1]}') };
-
       # The source tree and $destdir directory (which we have
       # installed on the worker host) are available in the container,
       # under the same path.
@@ -992,7 +987,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
       update_progress_stats();
       select (undef, undef, undef, 0.1);
     }
-    elsif (time - $progress_stats_updated >= 30)
+    elsif (time - $progress_stats_updated >= 30 || $progress_is_dirty)
     {
       update_progress_stats();
     }
@@ -1099,8 +1094,8 @@ sub update_progress_stats
   $progress_stats_updated = time;
   return if !$progress_is_dirty;
   my ($todo, $done, $running) = (scalar @jobstep_todo,
-                                scalar @jobstep_done,
-                                scalar @slot - scalar @freeslot - scalar @holdslot);
+                                 scalar @jobstep_done,
+                                 scalar keys(%proc));
   $Job->{'tasks_summary'} ||= {};
   $Job->{'tasks_summary'}->{'todo'} = $todo;
   $Job->{'tasks_summary'}->{'done'} = $done;
diff --git a/sdk/cli/test/binstub_clean_fail/mount b/sdk/cli/test/binstub_clean_fail/mount
new file mode 100755 (executable)
index 0000000..961ac28
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/sh
+echo >&2 Failing mount stub was called
+exit 1
diff --git a/sdk/cli/test/binstub_docker_noop/docker.io b/sdk/cli/test/binstub_docker_noop/docker.io
new file mode 100755 (executable)
index 0000000..af3a4e4
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/sh
+true
diff --git a/sdk/cli/test/binstub_sanity_check/docker.io b/sdk/cli/test/binstub_sanity_check/docker.io
new file mode 100755 (executable)
index 0000000..8f1569d
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/sh
+exit 8
diff --git a/sdk/cli/test/binstub_sanity_check/true b/sdk/cli/test/binstub_sanity_check/true
new file mode 100755 (executable)
index 0000000..4b88b91
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/sh
+exit 7
index 18bef403b761f52701fdc86b2919dac44de59e13..3dc4bdd434a101507fee3ebd8f2e5004e66cd49c 100644 (file)
@@ -7,8 +7,6 @@ class TestCollectionCreate < Minitest::Test
   end
 
   def test_small_collection
-    skip "Waiting unitl #4534 is implemented"
-
     uuid = Digest::MD5.hexdigest(foo_manifest) + '+' + foo_manifest.size.to_s
     out, err = capture_subprocess_io do
       assert_arv('--format', 'uuid', 'collection', 'create', '--collection', {
index 67dd399a2456fe4a7c2a2a2cf4d86401d409e6d6..5e58014cbfa10d3b9b67a8b7cddca8b8676f646c 100644 (file)
@@ -30,14 +30,10 @@ class TestArvGet < Minitest::Test
   end
 
   def test_file_to_dev_stdout
-    skip "Waiting unitl #4534 is implemented"
-
     test_file_to_stdout('/dev/stdout')
   end
 
   def test_file_to_stdout(specify_stdout_as='-')
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert_arv_get @@foo_manifest_locator + '/foo', specify_stdout_as
     end
@@ -46,8 +42,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_file_to_file
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/foo'
@@ -58,34 +52,30 @@ class TestArvGet < Minitest::Test
   end
 
   def test_file_to_file_no_overwrite_file
-    skip "Waiting unitl #4534 is implemented"
     File.open './tmp/foo', 'wb' do |f|
       f.write 'baz'
     end
     out, err = capture_subprocess_io do
       assert_arv_get false, @@foo_manifest_locator + '/foo', 'tmp/foo'
     end
-    assert_match /Error:/, err
+    assert_match /Local file tmp\/foo already exists/, err
     assert_equal '', out
     assert_equal 'baz', IO.read('tmp/foo')
   end
 
   def test_file_to_file_no_overwrite_file_in_dir
-    skip "Waiting unitl #4534 is implemented"
     File.open './tmp/foo', 'wb' do |f|
       f.write 'baz'
     end
     out, err = capture_subprocess_io do
       assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/'
     end
-    assert_match /Error:/, err
+    assert_match /Local file tmp\/foo already exists/, err
     assert_equal '', out
     assert_equal 'baz', IO.read('tmp/foo')
   end
 
   def test_file_to_file_force_overwrite
-    skip "Waiting unitl #4534 is implemented"
-
     File.open './tmp/foo', 'wb' do |f|
       f.write 'baz'
     end
@@ -99,8 +89,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_file_to_file_skip_existing
-    skip "Waiting unitl #4534 is implemented"
-
     File.open './tmp/foo', 'wb' do |f|
       f.write 'baz'
     end
@@ -114,8 +102,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_file_to_dir
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/'
@@ -142,28 +128,22 @@ class TestArvGet < Minitest::Test
   end
 
   def test_nonexistent_block
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
-      assert_arv_get false, 'f1554a91e925d6213ce7c3103c5110c6'
+      assert_arv_get false, 'e796ab2294f3e48ec709ffa8d6daf58c'
     end
     assert_equal '', out
     assert_match /Error:/, err
   end
 
   def test_nonexistent_manifest
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
-      assert_arv_get false, 'f1554a91e925d6213ce7c3103c5110c6/', 'tmp/'
+      assert_arv_get false, 'acbd18db4cc2f85cedef654fccc4a4d8/', 'tmp/'
     end
     assert_equal '', out
     assert_match /Error:/, err
   end
 
   def test_manifest_root_to_dir
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp/'
@@ -174,8 +154,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_manifest_root_to_dir_noslash
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp'
@@ -186,8 +164,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_display_md5sum
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get '-r', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
@@ -198,8 +174,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_md5sum_nowrite
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get '-n', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
@@ -210,8 +184,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_sha1_nowrite
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get '-n', '-r', '--hash', 'sha1', @@foo_manifest_locator+'/', 'tmp/'
@@ -222,8 +194,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_block_to_file
-    skip "Waiting unitl #4534 is implemented"
-
     remove_tmp_foo
     out, err = capture_subprocess_io do
       assert_arv_get @@foo_manifest_locator, 'tmp/foo'
@@ -236,8 +206,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_create_directory_tree
-    skip "Waiting unitl #4534 is implemented"
-
     `rm -rf ./tmp/arv-get-test/`
     Dir.mkdir './tmp/arv-get-test'
     out, err = capture_subprocess_io do
@@ -249,8 +217,6 @@ class TestArvGet < Minitest::Test
   end
 
   def test_create_partial_directory_tree
-    skip "Waiting unitl #4534 is implemented"
-
     `rm -rf ./tmp/arv-get-test/`
     Dir.mkdir './tmp/arv-get-test'
     out, err = capture_subprocess_io do
index 73513db56cb17ee5f6d88d151205f437e6d22107..2f20e18440a2ff61dde6b748d3b327587530b142 100644 (file)
@@ -22,8 +22,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_raw_stdin
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       r,w = IO.pipe
       wpid = fork do
@@ -41,8 +39,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_raw_file
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert arv_put('--raw', './tmp/foo')
     end
@@ -52,8 +48,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_raw_empty_file
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert arv_put('--raw', './tmp/empty_file')
     end
@@ -83,8 +77,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_filename_arg_with_empty_file
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert arv_put('--filename', 'foo', './tmp/empty_file')
     end
@@ -94,8 +86,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_as_stream
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert arv_put('--as-stream', './tmp/foo')
     end
@@ -105,8 +95,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_progress
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert arv_put('--manifest', '--progress', './tmp/foo')
     end
@@ -115,8 +103,6 @@ class TestArvPut < Minitest::Test
   end
 
   def test_batch_progress
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       assert arv_put('--manifest', '--batch-progress', './tmp/foo')
     end
@@ -136,20 +122,14 @@ class TestArvPut < Minitest::Test
   end
 
   def test_read_from_implicit_stdin
-    skip "Waiting unitl #4534 is implemented"
-
     test_read_from_stdin(specify_stdin_as='--manifest')
   end
 
   def test_read_from_dev_stdin
-    skip "Waiting unitl #4534 is implemented"
-
     test_read_from_stdin(specify_stdin_as='/dev/stdin')
   end
 
   def test_read_from_stdin(specify_stdin_as='-')
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       r,w = IO.pipe
       wpid = fork do
@@ -168,22 +148,16 @@ class TestArvPut < Minitest::Test
   end
 
   def test_read_from_implicit_stdin_implicit_manifest
-    skip "Waiting unitl #4534 is implemented"
-
     test_read_from_stdin_implicit_manifest(specify_stdin_as=nil,
                                            expect_filename='stdin')
   end
 
   def test_read_from_dev_stdin_implicit_manifest
-    skip "Waiting unitl #4534 is implemented"
-
     test_read_from_stdin_implicit_manifest(specify_stdin_as='/dev/stdin')
   end
 
   def test_read_from_stdin_implicit_manifest(specify_stdin_as='-',
                                              expect_filename=nil)
-    skip "Waiting unitl #4534 is implemented"
-
     expect_filename = expect_filename || specify_stdin_as.split('/').last
     out, err = capture_subprocess_io do
       r,w = IO.pipe
index 8c8d1d8331ae05fcbda64a65289732188c66bcd8..cac89b37bc0555c4929c6efadf873c32aed01297 100644 (file)
@@ -5,8 +5,6 @@ class TestRunPipelineInstance < Minitest::Test
   end
 
   def test_run_pipeline_instance_get_help
-    skip "Waiting unitl #4534 is implemented"
-
     out, err = capture_subprocess_io do
       system ('arv-run-pipeline-instance -h')
     end
index a5a1c94fff29227e0944afcae08383529cfe0b33..f4eba4651cbcd06494c41d1e05311dac663f65ed 100644 (file)
@@ -9,7 +9,7 @@ end
 class TestArvTag < Minitest::Test
 
   def test_no_args
-    skip "Waiting unitl #4534 is implemented"
+    skip "Waiting until #4534 is implemented"
 
     # arv-tag exits with failure if run with no args
     out, err = capture_subprocess_io do
diff --git a/sdk/cli/test/test_crunch-job.rb b/sdk/cli/test/test_crunch-job.rb
new file mode 100644 (file)
index 0000000..22d756a
--- /dev/null
@@ -0,0 +1,126 @@
+require 'minitest/autorun'
+
+class TestCrunchJob < Minitest::Test
+  SPECIAL_EXIT = {
+    EX_RETRY_UNLOCKED: 93,
+    EX_TEMPFAIL: 75,
+  }
+
+  JOBSPEC = {
+    grep_local: {
+      script: 'grep',
+      script_version: 'master',
+      repository: File.absolute_path('../../../..', __FILE__),
+      script_parameters: {foo: 'bar'},
+    },
+  }
+
+  def setup
+  end
+
+  def crunchjob
+    File.absolute_path '../../bin/crunch-job', __FILE__
+  end
+
+  # Return environment suitable for running crunch-job.
+  def crunchenv opts={}
+    env = ENV.to_h
+    env['CRUNCH_REFRESH_TRIGGER'] =
+      File.absolute_path('../../../../tmp/crunch-refresh-trigger', __FILE__)
+    env
+  end
+
+  def jobspec label
+    JOBSPEC[label].dup
+  end
+
+  # Encode job record to json and run it with crunch-job.
+  #
+  # opts[:binstubs] is an array of X where ./binstub_X is added to
+  # PATH in order to mock system programs.
+  def tryjobrecord jobrecord, opts={}
+    env = crunchenv
+    (opts[:binstubs] || []).each do |binstub|
+      env['PATH'] = File.absolute_path('../binstub_'+binstub, __FILE__) + ':' + env['PATH']
+    end
+    system env, crunchjob, '--job', jobrecord.to_json
+  end
+
+  def test_bogus_json
+    out, err = capture_subprocess_io do
+      system crunchenv, crunchjob, '--job', '"}{"'
+    end
+    assert_equal false, $?.success?
+    # Must not conflict with our special exit statuses
+    assert_jobfail $?
+    assert_match /JSON/, err
+  end
+
+  def test_fail_sanity_check
+    out, err = capture_subprocess_io do
+      j = {}
+      tryjobrecord j, binstubs: ['sanity_check']
+    end
+    assert_equal 75, $?.exitstatus
+    assert_match /Sanity check failed: 7/, err
+  end
+
+  def test_fail_docker_sanity_check
+    out, err = capture_subprocess_io do
+      j = {}
+      j[:docker_image_locator] = '4d449b9d34f2e2222747ef79c53fa3ff+1234'
+      tryjobrecord j, binstubs: ['sanity_check']
+    end
+    assert_equal 75, $?.exitstatus
+    assert_match /Sanity check failed: 8/, err
+  end
+
+  def test_no_script_specified
+    out, err = capture_subprocess_io do
+      j = jobspec :grep_local
+      j.delete :script
+      tryjobrecord j
+    end
+    assert_match /No script specified/, err
+    assert_jobfail $?
+  end
+
+  def test_fail_clean_tmp
+    out, err = capture_subprocess_io do
+      j = jobspec :grep_local
+      tryjobrecord j, binstubs: ['clean_fail']
+    end
+    assert_match /Failing mount stub was called/, err
+    assert_match /Clean work dirs: exit 1\n$/, err
+    assert_equal SPECIAL_EXIT[:EX_RETRY_UNLOCKED], $?.exitstatus
+  end
+
+  def test_docker_image_missing
+    skip 'API bug: it refuses to create this job in Running state'
+    out, err = capture_subprocess_io do
+      j = jobspec :grep_local
+      j[:docker_image_locator] = '4d449b9d34f2e2222747ef79c53fa3ff+1234'
+      tryjobrecord j, binstubs: ['docker_noop']
+    end
+    assert_match /No Docker image hash found from locator/, err
+    assert_jobfail $?
+  end
+
+  def test_script_version_not_found_in_repository
+    bogus_version = 'f8b72707c1f5f740dbf1ed56eb429a36e0dee770'
+    out, err = capture_subprocess_io do
+      j = jobspec :grep_local
+      j[:script_version] = bogus_version
+      tryjobrecord j
+    end
+    assert_match /'#{bogus_version}' not found, giving up/, err
+    assert_jobfail $?
+  end
+
+  # Ensure procstatus is not interpreted as a temporary infrastructure
+  # problem. Would be assert_http_4xx if this were http.
+  def assert_jobfail procstatus
+    refute_includes SPECIAL_EXIT.values, procstatus.exitstatus
+    assert_equal false, procstatus.success?
+  end
+end
diff --git a/sdk/cwl/.gitignore b/sdk/cwl/.gitignore
new file mode 120000 (symlink)
index 0000000..1399fd4
--- /dev/null
@@ -0,0 +1 @@
+../python/.gitignore
\ No newline at end of file
diff --git a/sdk/cwl/README.rst b/sdk/cwl/README.rst
new file mode 100644 (file)
index 0000000..743b6c6
--- /dev/null
@@ -0,0 +1 @@
+Arvados Common Workflow Language (CWL) runner.
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py
new file mode 100644 (file)
index 0000000..f3298ec
--- /dev/null
@@ -0,0 +1,295 @@
+#!/usr/bin/env python
+
+import argparse
+import arvados
+import arvados.events
+import arvados.commands.keepdocker
+import arvados.commands.run
+import cwltool.draft2tool
+import cwltool.workflow
+import cwltool.main
+import threading
+import cwltool.docker
+import fnmatch
+import logging
+import re
+import os
+from cwltool.process import get_feature
+
+logger = logging.getLogger('arvados.cwl-runner')
+logger.setLevel(logging.INFO)
+
+def arv_docker_get_image(api_client, dockerRequirement, pull_image):
+    if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement:
+        dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"]
+
+    sp = dockerRequirement["dockerImageId"].split(":")
+    image_name = sp[0]
+    image_tag = sp[1] if len(sp) > 1 else None
+
+    images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
+                                                            image_name=image_name,
+                                                            image_tag=image_tag)
+
+    if not images:
+        imageId = cwltool.docker.get_image(dockerRequirement, pull_image)
+        args = [image_name]
+        if image_tag:
+            args.append(image_tag)
+        arvados.commands.keepdocker.main(args)
+
+    return dockerRequirement["dockerImageId"]
+
+class CollectionFsAccess(cwltool.draft2tool.StdFsAccess):
+    def __init__(self, basedir):
+        self.collections = {}
+        self.basedir = basedir
+
+    def get_collection(self, path):
+        p = path.split("/")
+        if arvados.util.keep_locator_pattern.match(p[0]):
+            if p[0] not in self.collections:
+                self.collections[p[0]] = arvados.collection.CollectionReader(p[0])
+            return (self.collections[p[0]], "/".join(p[1:]))
+        else:
+            return (None, path)
+
+    def _match(self, collection, patternsegments, parent):
+        ret = []
+        for filename in collection:
+            if fnmatch.fnmatch(filename, patternsegments[0]):
+                cur = os.path.join(parent, filename)
+                if len(patternsegments) == 1:
+                    ret.append(cur)
+                else:
+                    ret.extend(self._match(collection[filename], patternsegments[1:], cur))
+        return ret
+
+    def glob(self, pattern):
+        collection, rest = self.get_collection(pattern)
+        patternsegments = rest.split("/")
+        return self._match(collection, patternsegments, collection.manifest_locator())
+
+    def open(self, fn, mode):
+        collection, rest = self.get_collection(fn)
+        if collection:
+            return collection.open(rest, mode)
+        else:
+            return open(self._abs(fn), mode)
+
+    def exists(self, fn):
+        collection, rest = self.get_collection(fn)
+        if collection:
+            return collection.exists(rest)
+        else:
+            return os.path.exists(self._abs(fn))
+
+class ArvadosJob(object):
+    def __init__(self, runner):
+        self.arvrunner = runner
+        self.running = False
+
+    def run(self, dry_run=False, pull_image=True, **kwargs):
+        script_parameters = {
+            "command": self.command_line
+        }
+        runtime_constraints = {}
+
+        if self.generatefiles:
+            vwd = arvados.collection.Collection()
+            for t in self.generatefiles:
+                if isinstance(self.generatefiles[t], dict):
+                    src, rest = self.arvrunner.fs_access.get_collection(self.generatefiles[t]["path"][6:])
+                    vwd.copy(rest, t, source_collection=src)
+                else:
+                    with vwd.open(t, "w") as f:
+                        f.write(self.generatefiles[t])
+            vwd.save_new()
+            script_parameters["task.vwd"] = vwd.portable_data_hash()
+
+        script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"}
+        if self.environment:
+            script_parameters["task.env"].update(self.environment)
+
+        if self.stdin:
+            script_parameters["task.stdin"] = self.pathmapper.mapper(self.stdin)[1]
+
+        if self.stdout:
+            script_parameters["task.stdout"] = self.stdout
+
+        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
+        if docker_req and kwargs.get("use_container") is not False:
+            runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image)
+            runtime_constraints["arvados_sdk_version"] = "master"
+
+        response = self.arvrunner.api.jobs().create(body={
+            "script": "run-command",
+            "repository": "arvados",
+            "script_version": "master",
+            "script_parameters": script_parameters,
+            "runtime_constraints": runtime_constraints
+        }, find_or_create=kwargs.get("enable_reuse", True)).execute()
+
+        self.arvrunner.jobs[response["uuid"]] = self
+
+        logger.info("Job %s is %s", response["uuid"], response["state"])
+
+        if response["state"] in ("Complete", "Failed", "Cancelled"):
+            self.done(response)
+
+    def done(self, record):
+        try:
+            if record["state"] == "Complete":
+                processStatus = "success"
+            else:
+                processStatus = "permanentFail"
+
+            try:
+                outputs = {}
+                outputs = self.collect_outputs(record["output"])
+            except Exception as e:
+                logger.warn(str(e))
+                processStatus = "permanentFail"
+
+            self.output_callback(outputs, processStatus)
+        finally:
+            del self.arvrunner.jobs[record["uuid"]]
+
+class ArvPathMapper(cwltool.pathmapper.PathMapper):
+    def __init__(self, arvrunner, referenced_files, basedir, **kwargs):
+        self._pathmap = {}
+        uploadfiles = []
+
+        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+/.+')
+
+        for src in referenced_files:
+            if isinstance(src, basestring) and pdh_path.match(src):
+                self._pathmap[src] = (src, "/keep/%s" % src)
+            else:
+                ab = src if os.path.isabs(src) else os.path.join(basedir, src)
+                st = arvados.commands.run.statfile("", ab)
+                if kwargs.get("conformance_test"):
+                    self._pathmap[src] = (src, ab)
+                elif isinstance(st, arvados.commands.run.UploadFile):
+                    uploadfiles.append((src, ab, st))
+                elif isinstance(st, arvados.commands.run.ArvFile):
+                    self._pathmap[src] = (ab, st.fn)
+                else:
+                    raise cwltool.workflow.WorkflowException("Input file path '%s' is invalid" % st)
+
+        if uploadfiles:
+            arvados.commands.run.uploadfiles([u[2] for u in uploadfiles], arvrunner.api, dry_run=kwargs.get("dry_run"), num_retries=3)
+
+        for src, ab, st in uploadfiles:
+            self._pathmap[src] = (ab, st.fn)
+
+
+
+class ArvadosCommandTool(cwltool.draft2tool.CommandLineTool):
+    def __init__(self, arvrunner, toolpath_object, **kwargs):
+        super(ArvadosCommandTool, self).__init__(toolpath_object, **kwargs)
+        self.arvrunner = arvrunner
+
+    def makeJobRunner(self):
+        return ArvadosJob(self.arvrunner)
+
+    def makePathMapper(self, reffiles, input_basedir, **kwargs):
+        return ArvPathMapper(self.arvrunner, reffiles, input_basedir, **kwargs)
+
+
+class ArvCwlRunner(object):
+    def __init__(self, api_client):
+        self.api = api_client
+        self.jobs = {}
+        self.lock = threading.Lock()
+        self.cond = threading.Condition(self.lock)
+        self.final_output = None
+
+    def arvMakeTool(self, toolpath_object, **kwargs):
+        if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
+            return ArvadosCommandTool(self, toolpath_object, **kwargs)
+        else:
+            return cwltool.workflow.defaultMakeTool(toolpath_object, **kwargs)
+
+    def output_callback(self, out, processStatus):
+        if processStatus == "success":
+            logger.info("Overall job status is %s", processStatus)
+        else:
+            logger.warn("Overall job status is %s", processStatus)
+        self.final_output = out
+
+    def on_message(self, event):
+        if "object_uuid" in event:
+                if event["object_uuid"] in self.jobs and event["event_type"] == "update":
+                    if event["properties"]["new_attributes"]["state"] == "Running" and self.jobs[event["object_uuid"]].running is False:
+                        logger.info("Job %s is Running", event["object_uuid"])
+                        with self.lock:
+                            self.jobs[event["object_uuid"]].running = True
+                    elif event["properties"]["new_attributes"]["state"] in ("Complete", "Failed", "Cancelled"):
+                        logger.info("Job %s is %s", event["object_uuid"], event["properties"]["new_attributes"]["state"])
+                        try:
+                            self.cond.acquire()
+                            self.jobs[event["object_uuid"]].done(event["properties"]["new_attributes"])
+                            self.cond.notify()
+                        finally:
+                            self.cond.release()
+
+    def arvExecutor(self, tool, job_order, input_basedir, args, **kwargs):
+        events = arvados.events.subscribe(arvados.api('v1'), [["object_uuid", "is_a", "arvados#job"]], self.on_message)
+
+        self.fs_access = CollectionFsAccess(input_basedir)
+
+        kwargs["fs_access"] = self.fs_access
+        kwargs["enable_reuse"] = args.enable_reuse
+
+        if kwargs.get("conformance_test"):
+            return cwltool.main.single_job_executor(tool, job_order, input_basedir, args, **kwargs)
+        else:
+            jobiter = tool.job(job_order,
+                            input_basedir,
+                            self.output_callback,
+                            **kwargs)
+
+            for runnable in jobiter:
+                if runnable:
+                    with self.lock:
+                        runnable.run(**kwargs)
+                else:
+                    if self.jobs:
+                        try:
+                            self.cond.acquire()
+                            self.cond.wait()
+                        finally:
+                            self.cond.release()
+                    else:
+                        logger.error("Workflow cannot make any more progress.")
+                        break
+
+            while self.jobs:
+                try:
+                    self.cond.acquire()
+                    self.cond.wait()
+                finally:
+                    self.cond.release()
+
+            events.close()
+
+            if self.final_output is None:
+                raise cwltool.workflow.WorkflowException("Workflow did not return a result.")
+
+            return self.final_output
+
+
+def main(args, stdout, stderr, api_client=None):
+    runner = ArvCwlRunner(api_client=arvados.api('v1'))
+    args.append("--leave-outputs")
+    parser = cwltool.main.arg_parser()
+    exgroup = parser.add_mutually_exclusive_group()
+    exgroup.add_argument("--enable-reuse", action="store_true",
+                        default=False, dest="enable_reuse",
+                        help="")
+    exgroup.add_argument("--disable-reuse", action="store_false",
+                        default=False, dest="enable_reuse",
+                        help="")
+
+    return cwltool.main.main(args, executor=runner.arvExecutor, makeTool=runner.arvMakeTool, parser=parser)
diff --git a/sdk/cwl/bin/cwl-runner b/sdk/cwl/bin/cwl-runner
new file mode 100755 (executable)
index 0000000..f31aefd
--- /dev/null
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+
+import sys
+
+from arvados_cwl import main
+
+sys.exit(main(sys.argv[1:], sys.stdout, sys.stderr))
diff --git a/sdk/cwl/gittaggers.py b/sdk/cwl/gittaggers.py
new file mode 120000 (symlink)
index 0000000..d59c02c
--- /dev/null
@@ -0,0 +1 @@
+../python/gittaggers.py
\ No newline at end of file
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py
new file mode 100644 (file)
index 0000000..2fd03f7
--- /dev/null
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import setuptools.command.egg_info as egg_info_cmd
+
+from setuptools import setup, find_packages
+
+SETUP_DIR = os.path.dirname(__file__) or '.'
+README = os.path.join(SETUP_DIR, 'README.rst')
+
+try:
+    import gittaggers
+    tagger = gittaggers.EggInfoFromGit
+except ImportError:
+    tagger = egg_info_cmd.egg_info
+
+setup(name='arvados-cwl-runner',
+      version='1.0',
+      description='Arvados Common Workflow Language runner',
+      long_description=open(README).read(),
+      author='Arvados',
+      author_email='info@arvados.org',
+      url="https://arvados.org",
+      download_url="https://github.com/curoverse/arvados.git",
+      license='Apache 2.0',
+      packages=find_packages(),
+      scripts=[
+          'bin/cwl-runner'
+      ],
+      install_requires=[
+          'cwltool',
+          'arvados-python-client'
+      ],
+      zip_safe=True,
+      cmdclass={'egg_info': tagger},
+      )
index 21e31ad8055ce27e84f303c538789da050c021f6..d676d37a8a7dfb54fbb0429a548b4e5b90cac6e4 100644 (file)
@@ -6,5 +6,10 @@ use ExtUtils::MakeMaker;
 
 WriteMakefile(
     NAME            => 'Arvados',
-    VERSION_FROM    => 'lib/Arvados.pm'
+    VERSION_FROM    => 'lib/Arvados.pm',
+    PREREQ_PM       => {
+        'JSON'     => 0,
+        'LWP'      => 0,
+        'Net::SSL' => 0,
+    },
 );
index 1d10b042729f1712a896a4d1d17c1a01d1cef29c..c5749cce5e860c5ee251f8f3bfa5616befa2c7d4 100755 (executable)
@@ -188,6 +188,13 @@ def api_for_instance(instance_name):
         abort('need ARVADOS_API_HOST and ARVADOS_API_TOKEN for {}'.format(instance_name))
     return client
 
+# Check if git is available
+def check_git_availability():
+    try:
+        arvados.util.run_command(['git', '--help'])
+    except Exception:
+        abort('git command is not available. Please ensure git is installed.')
+
 # copy_pipeline_instance(pi_uuid, src, dst, args)
 #
 #    Copies a pipeline instance identified by pi_uuid from src to dst.
@@ -212,6 +219,8 @@ def copy_pipeline_instance(pi_uuid, src, dst, args):
     pi = src.pipeline_instances().get(uuid=pi_uuid).execute(num_retries=args.retries)
 
     if args.recursive:
+        check_git_availability()
+
         if not args.dst_git_repo:
             abort('--dst-git-repo is required when copying a pipeline recursively.')
         # Copy the pipeline template and save the copied template.
@@ -265,6 +274,8 @@ def copy_pipeline_template(pt_uuid, src, dst, args):
     pt = src.pipeline_templates().get(uuid=pt_uuid).execute(num_retries=args.retries)
 
     if args.recursive:
+        check_git_availability()
+
         if not args.dst_git_repo:
             abort('--dst-git-repo is required when copying a pipeline recursively.')
         # Copy input collections, docker images and git repos.
@@ -318,9 +329,9 @@ def copy_collections(obj, src, dst, args):
         obj = arvados.util.portable_data_hash_pattern.sub(copy_collection_fn, obj)
         obj = arvados.util.collection_uuid_pattern.sub(copy_collection_fn, obj)
         return obj
-    elif type(obj) == dict:
+    elif isinstance(obj, dict):
         return {v: copy_collections(obj[v], src, dst, args) for v in obj}
-    elif type(obj) == list:
+    elif isinstance(obj, list):
         return [copy_collections(v, src, dst, args) for v in obj]
     return obj
 
index 496db86ab41c71a1eec19a0c6da244ebb060b2a5..7ca6e7ca234f9b7ef2f1bc4dfc6ef84910c9c1e0 100644 (file)
@@ -167,7 +167,9 @@ def parse_arguments(arguments):
     args = arg_parser.parse_args(arguments)
 
     if len(args.paths) == 0:
-        args.paths += ['/dev/stdin']
+        args.paths = ['-']
+
+    args.paths = map(lambda x: "-" if x == "/dev/stdin" else x, args.paths)
 
     if len(args.paths) != 1 or os.path.isdir(args.paths[0]):
         if args.filename:
@@ -182,9 +184,9 @@ def parse_arguments(arguments):
         args.progress = True
 
     if args.paths == ['-']:
-        args.paths = ['/dev/stdin']
+        args.resume = False
         if not args.filename:
-            args.filename = '-'
+            args.filename = 'stdin'
 
     return args
 
@@ -466,7 +468,16 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
     writer.report_progress()
     writer.do_queued_work()  # Do work resumed from cache.
     for path in args.paths:  # Copy file data to Keep.
-        if os.path.isdir(path):
+        if path == '-':
+            writer.start_new_stream()
+            writer.start_new_file(args.filename)
+            r = sys.stdin.read(64*1024)
+            while r:
+                # Need to bypass _queued_file check in ResumableCollectionWriter.write() to get
+                # CollectionWriter.write().
+                super(arvados.collection.ResumableCollectionWriter, writer).write(r)
+                r = sys.stdin.read(64*1024)
+        elif os.path.isdir(path):
             writer.write_directory_tree(
                 path, max_manifest_depth=args.max_manifest_depth)
         else:
index f2bf0f353bbd146ece4775da7b6094d654ed0d04..be94e7304a34f3a2cdc829cd9a9f4b8339aaaf33 100644 (file)
@@ -14,11 +14,12 @@ import logging
 import arvados.commands._util as arv_cmd
 
 logger = logging.getLogger('arvados.arv-run')
+logger.setLevel(logging.INFO)
 
 arvrun_parser = argparse.ArgumentParser(parents=[arv_cmd.retry_opt])
 arvrun_parser.add_argument('--dry-run', action="store_true", help="Print out the pipeline that would be submitted and exit")
 arvrun_parser.add_argument('--local', action="store_true", help="Run locally using arv-run-pipeline-instance")
-arvrun_parser.add_argument('--docker-image', type=str, default="arvados/jobs", help="Docker image to use, default arvados/jobs")
+arvrun_parser.add_argument('--docker-image', type=str, help="Docker image to use, otherwise use instance default.")
 arvrun_parser.add_argument('--ignore-rcode', action="store_true", help="Commands that return non-zero return codes should not be considered failed.")
 arvrun_parser.add_argument('--no-reuse', action="store_true", help="Do not reuse past jobs.")
 arvrun_parser.add_argument('--no-wait', action="store_true", help="Do not wait and display logs after submitting command, just exit.")
@@ -100,6 +101,63 @@ def statfile(prefix, fn):
 
     return prefix+fn
 
+def uploadfiles(files, api, dry_run=False, num_retries=0, project=None):
+    # Find the smallest path prefix that includes all the files that need to be uploaded.
+    # This starts at the root and iteratively removes common parent directory prefixes
+    # until all file pathes no longer have a common parent.
+    n = True
+    pathprefix = "/"
+    while n:
+        pathstep = None
+        for c in files:
+            if pathstep is None:
+                sp = c.fn.split('/')
+                if len(sp) < 2:
+                    # no parent directories left
+                    n = False
+                    break
+                # path step takes next directory
+                pathstep = sp[0] + "/"
+            else:
+                # check if pathstep is common prefix for all files
+                if not c.fn.startswith(pathstep):
+                    n = False
+                    break
+        if n:
+            # pathstep is common parent directory for all files, so remove the prefix
+            # from each path
+            pathprefix += pathstep
+            for c in files:
+                c.fn = c.fn[len(pathstep):]
+
+    orgdir = os.getcwd()
+    os.chdir(pathprefix)
+
+    logger.info("Upload local files: \"%s\"", '" "'.join([c.fn for c in files]))
+
+    if dry_run:
+        logger.info("$(input) is %s", pathprefix.rstrip('/'))
+        pdh = "$(input)"
+    else:
+        files = sorted(files, key=lambda x: x.fn)
+        collection = arvados.CollectionWriter(api, num_retries=num_retries)
+        stream = None
+        for f in files:
+            sp = os.path.split(f.fn)
+            if sp[0] != stream:
+                stream = sp[0]
+                collection.start_new_stream(stream)
+            collection.write_file(f.fn, sp[1])
+        item = api.collections().create(body={"owner_uuid": project, "manifest_text": collection.manifest_text()}).execute()
+        pdh = item["portable_data_hash"]
+        logger.info("Uploaded to %s", item["uuid"])
+
+    for c in files:
+        c.fn = "$(file %s/%s)" % (pdh, c.fn)
+
+    os.chdir(orgdir)
+
+
 def main(arguments=None):
     args = arvrun_parser.parse_args(arguments)
 
@@ -178,62 +236,9 @@ def main(arguments=None):
                             command[i] = statfile(m.group(1), m.group(2))
                             break
 
-    n = True
-    pathprefix = "/"
     files = [c for command in slots[1:] for c in command if isinstance(c, UploadFile)]
-    if len(files) > 0:
-        # Find the smallest path prefix that includes all the files that need to be uploaded.
-        # This starts at the root and iteratively removes common parent directory prefixes
-        # until all file pathes no longer have a common parent.
-        while n:
-            pathstep = None
-            for c in files:
-                if pathstep is None:
-                    sp = c.fn.split('/')
-                    if len(sp) < 2:
-                        # no parent directories left
-                        n = False
-                        break
-                    # path step takes next directory
-                    pathstep = sp[0] + "/"
-                else:
-                    # check if pathstep is common prefix for all files
-                    if not c.fn.startswith(pathstep):
-                        n = False
-                        break
-            if n:
-                # pathstep is common parent directory for all files, so remove the prefix
-                # from each path
-                pathprefix += pathstep
-                for c in files:
-                    c.fn = c.fn[len(pathstep):]
-
-        orgdir = os.getcwd()
-        os.chdir(pathprefix)
-
-        print("Upload local files: \"%s\"" % '" "'.join([c.fn for c in files]))
-
-        if args.dry_run:
-            print("$(input) is %s" % pathprefix.rstrip('/'))
-            pdh = "$(input)"
-        else:
-            files = sorted(files, key=lambda x: x.fn)
-            collection = arvados.CollectionWriter(api, num_retries=args.retries)
-            stream = None
-            for f in files:
-                sp = os.path.split(f.fn)
-                if sp[0] != stream:
-                    stream = sp[0]
-                    collection.start_new_stream(stream)
-                collection.write_file(f.fn, sp[1])
-            item = api.collections().create(body={"owner_uuid": project, "manifest_text": collection.manifest_text()}).execute()
-            pdh = item["portable_data_hash"]
-            print "Uploaded to %s" % item["uuid"]
-
-        for c in files:
-            c.fn = "$(file %s/%s)" % (pdh, c.fn)
-
-        os.chdir(orgdir)
+    if files:
+        uploadfiles(files, api, dry_run=args.dry_run, num_retries=args.num_retries, project=project)
 
     for i in xrange(1, len(slots)):
         slots[i] = [("%s%s" % (c.prefix, c.fn)) if isinstance(c, ArvFile) else c for c in slots[i]]
@@ -244,11 +249,12 @@ def main(arguments=None):
         "repository": args.repository,
         "script_parameters": {
         },
-        "runtime_constraints": {
-            "docker_image": args.docker_image
-        }
+        "runtime_constraints": {}
     }
 
+    if args.docker_image:
+        component["runtime_constraints"]["docker_image"] = args.docker_image
+
     task_foreach = []
     group_parser = argparse.ArgumentParser()
     group_parser.add_argument('-b', '--batch-size', type=int)
@@ -298,7 +304,7 @@ def main(arguments=None):
     else:
         pipeline["owner_uuid"] = project
         pi = api.pipeline_instances().create(body=pipeline, ensure_unique_name=True).execute()
-        print "Running pipeline %s" % pi["uuid"]
+        logger.info("Running pipeline %s", pi["uuid"])
 
         if args.local:
             subprocess.call(["arv-run-pipeline-instance", "--instance", pi["uuid"], "--run-jobs-here"] + (["--no-reuse"] if args.no_reuse else []))
@@ -306,11 +312,11 @@ def main(arguments=None):
             ws.main(["--pipeline", pi["uuid"]])
 
         pi = api.pipeline_instances().get(uuid=pi["uuid"]).execute()
-        print "Pipeline is %s" % pi["state"]
+        logger.info("Pipeline is %s", pi["state"])
         if "output_uuid" in pi["components"]["command"]:
-            print "Output is %s" % pi["components"]["command"]["output_uuid"]
+            logger.info("Output is %s", pi["components"]["command"]["output_uuid"])
         else:
-            print "No output"
+            logger.info("No output")
 
 if __name__ == '__main__':
     main()
index 9bce9971076372394fdf2e5c1f9523eda9d115ec..347075dffdbb8cca1144f277b46086929ba86ab7 100644 (file)
@@ -14,6 +14,7 @@ def main(arguments=None):
     parser = argparse.ArgumentParser()
     parser.add_argument('-u', '--uuid', type=str, default="", help="Filter events on object_uuid")
     parser.add_argument('-f', '--filters', type=str, default="", help="Arvados query filter to apply to log events (JSON encoded)")
+    parser.add_argument('-s', '--start-time', type=str, default="", help="Arvados query filter to fetch log events created at or after this time. This will be server time in UTC. Allowed format: YYYY-MM-DD or YYYY-MM-DD hh:mm:ss")
 
     group = parser.add_mutually_exclusive_group()
     group.add_argument('--poll-interval', default=15, type=int, help="If websockets is not available, specify the polling interval, default is every 15 seconds")
@@ -60,6 +61,12 @@ def main(arguments=None):
     if args.pipeline:
         filters += [ ['object_uuid', '=', args.pipeline] ]
 
+    if args.start_time:
+        last_log_id = 1
+        filters += [ ['created_at', '>=', args.start_time] ]
+    else:
+        last_log_id = None
+
     def on_message(ev):
         global filters
         global ws
@@ -85,7 +92,7 @@ def main(arguments=None):
             print json.dumps(ev)
 
     try:
-        ws = subscribe(arvados.api('v1'), filters, on_message, poll_fallback=args.poll_interval)
+        ws = subscribe(arvados.api('v1'), filters, on_message, poll_fallback=args.poll_interval, last_log_id=last_log_id)
         if ws:
             if args.pipeline:
                 c = api.pipeline_instances().get(uuid=args.pipeline).execute()
index 3036a25fe092260d8da0a52be90380a570f9bc46..df5b3e7dee514b8aea45edaa73bcd47042d8fab3 100644 (file)
@@ -14,7 +14,7 @@ from ws4py.client.threadedclient import WebSocketClient
 _logger = logging.getLogger('arvados.events')
 
 class EventClient(WebSocketClient):
-    def __init__(self, url, filters, on_event):
+    def __init__(self, url, filters, on_event, last_log_id):
         ssl_options = {'ca_certs': arvados.util.ca_certs_path()}
         if config.flag_is_true('ARVADOS_API_HOST_INSECURE'):
             ssl_options['cert_reqs'] = ssl.CERT_NONE
@@ -28,19 +28,33 @@ class EventClient(WebSocketClient):
         super(EventClient, self).__init__(url, ssl_options=ssl_options)
         self.filters = filters
         self.on_event = on_event
+        self.stop = threading.Event()
+        self.last_log_id = last_log_id
 
     def opened(self):
-        self.subscribe(self.filters)
+        self.subscribe(self.filters, self.last_log_id)
 
     def received_message(self, m):
         self.on_event(json.loads(str(m)))
 
-    def close_connection(self):
-        try:
-            self.sock.shutdown(socket.SHUT_RDWR)
-            self.sock.close()
-        except:
-            pass
+    def closed(self, code, reason=None):
+        self.stop.set()
+
+    def close(self, code=1000, reason=''):
+        """Close event client and wait for it to finish."""
+
+        # parent close() method sends a asynchronous "closed" event to the server
+        super(EventClient, self).close(code, reason)
+
+        # if server doesn't respond by finishing the close handshake, we'll be
+        # stuck in limbo forever.  We don't need to wait for the server to
+        # respond to go ahead and actually close the socket.
+        self.close_connection()
+
+        # wait for websocket thread to finish up (closed() is called by
+        # websocket thread in as part of terminate())
+        while not self.stop.is_set():
+            self.stop.wait(1)
 
     def subscribe(self, filters, last_log_id=None):
         m = {"method": "subscribe", "filters": filters}
@@ -52,7 +66,7 @@ class EventClient(WebSocketClient):
         self.send(json.dumps({"method": "unsubscribe", "filters": filters}))
 
 class PollClient(threading.Thread):
-    def __init__(self, api, filters, on_event, poll_time):
+    def __init__(self, api, filters, on_event, poll_time, last_log_id):
         super(PollClient, self).__init__()
         self.api = api
         if filters:
@@ -63,27 +77,35 @@ class PollClient(threading.Thread):
         self.poll_time = poll_time
         self.daemon = True
         self.stop = threading.Event()
+        self.last_log_id = last_log_id
 
     def run(self):
         self.id = 0
-        for f in self.filters:
-            items = self.api.logs().list(limit=1, order="id desc", filters=f).execute()['items']
-            if items:
-                if items[0]['id'] > self.id:
-                    self.id = items[0]['id']
+        if self.last_log_id != None:
+            self.id = self.last_log_id
+        else:
+            for f in self.filters:
+                items = self.api.logs().list(limit=1, order="id desc", filters=f).execute()['items']
+                if items:
+                    if items[0]['id'] > self.id:
+                        self.id = items[0]['id']
 
         self.on_event({'status': 200})
 
         while not self.stop.isSet():
             max_id = self.id
+            moreitems = False
             for f in self.filters:
-                items = self.api.logs().list(order="id asc", filters=f+[["id", ">", str(self.id)]]).execute()['items']
-                for i in items:
+                items = self.api.logs().list(order="id asc", filters=f+[["id", ">", str(self.id)]]).execute()
+                for i in items["items"]:
                     if i['id'] > max_id:
                         max_id = i['id']
                     self.on_event(i)
+                if items["items_available"] > len(items["items"]):
+                    moreitems = True
             self.id = max_id
-            self.stop.wait(self.poll_time)
+            if not moreitems:
+                self.stop.wait(self.poll_time)
 
     def run_forever(self):
         # Have to poll here, otherwise KeyboardInterrupt will never get processed.
@@ -91,6 +113,8 @@ class PollClient(threading.Thread):
             self.stop.wait(1)
 
     def close(self):
+        """Close poll client and wait for it to finish."""
+
         self.stop.set()
         try:
             self.join()
@@ -109,23 +133,28 @@ class PollClient(threading.Thread):
         del self.filters[self.filters.index(filters)]
 
 
-def _subscribe_websocket(api, filters, on_event):
+def _subscribe_websocket(api, filters, on_event, last_log_id=None):
     endpoint = api._rootDesc.get('websocketUrl', None)
     if not endpoint:
         raise errors.FeatureNotEnabledError(
             "Server does not advertise a websocket endpoint")
-    uri_with_token = "{}?api_token={}".format(endpoint, api.api_token)
-    client = EventClient(uri_with_token, filters, on_event)
-    ok = False
     try:
-        client.connect()
-        ok = True
-        return client
-    finally:
-        if not ok:
-            client.close_connection()
-
-def subscribe(api, filters, on_event, poll_fallback=15):
+        uri_with_token = "{}?api_token={}".format(endpoint, api.api_token)
+        client = EventClient(uri_with_token, filters, on_event, last_log_id)
+        ok = False
+        try:
+            client.connect()
+            ok = True
+            return client
+        finally:
+            if not ok:
+                client.close_connection()
+    except:
+        _logger.warn("Failed to connect to websockets on %s" % endpoint)
+        raise
+
+
+def subscribe(api, filters, on_event, poll_fallback=15, last_log_id=None):
     """
     :api:
       a client object retrieved from arvados.api(). The caller should not use this client object for anything else after calling subscribe().
@@ -135,15 +164,17 @@ def subscribe(api, filters, on_event, poll_fallback=15):
       The callback when a message is received.
     :poll_fallback:
       If websockets are not available, fall back to polling every N seconds.  If poll_fallback=False, this will return None if websockets are not available.
+    :last_log_id:
+      Log rows that are newer than the log id
     """
 
     if not poll_fallback:
-        return _subscribe_websocket(api, filters, on_event)
+        return _subscribe_websocket(api, filters, on_event, last_log_id)
 
     try:
-        return _subscribe_websocket(api, filters, on_event)
+        return _subscribe_websocket(api, filters, on_event, last_log_id)
     except Exception as e:
         _logger.warn("Falling back to polling after websocket error: %s" % e)
-    p = PollClient(api, filters, on_event, poll_fallback)
+    p = PollClient(api, filters, on_event, poll_fallback, last_log_id)
     p.start()
     return p
index aaf20945595e8b5e32fe74520465a761fd539e5e..3e59bfb861cbb3e135990f8ba3aa186070a0a5bc 100644 (file)
@@ -45,7 +45,7 @@ def run_command(execargs, **kwargs):
     p = subprocess.Popen(execargs, **kwargs)
     stdoutdata, stderrdata = p.communicate(None)
     if p.returncode != 0:
-        raise errors.CommandFailedError(
+        raise arvados.errors.CommandFailedError(
             "run_command %s exit %d:\n%s" %
             (execargs, p.returncode, stderrdata))
     return stdoutdata, stderrdata
@@ -110,7 +110,7 @@ def tarball_extract(tarball, path):
             elif re.search('\.tar$', f.name()):
                 p = tar_extractor(path, '')
             else:
-                raise errors.AssertionError(
+                raise arvados.errors.AssertionError(
                     "tarball_extract cannot handle filename %s" % f.name())
             while True:
                 buf = f.read(2**20)
@@ -121,7 +121,7 @@ def tarball_extract(tarball, path):
             p.wait()
             if p.returncode != 0:
                 lockfile.close()
-                raise errors.CommandFailedError(
+                raise arvados.errors.CommandFailedError(
                     "tar exited %d" % p.returncode)
         os.symlink(tarball, os.path.join(path, '.locator'))
     tld_extracts = filter(lambda f: f != '.locator', os.listdir(path))
@@ -165,7 +165,7 @@ def zipball_extract(zipball, path):
 
         for f in CollectionReader(zipball).all_files():
             if not re.search('\.zip$', f.name()):
-                raise errors.NotImplementedError(
+                raise arvados.errors.NotImplementedError(
                     "zipball_extract cannot handle filename %s" % f.name())
             zip_filename = os.path.join(path, os.path.basename(f.name()))
             zip_file = open(zip_filename, 'wb')
@@ -186,7 +186,7 @@ def zipball_extract(zipball, path):
             p.wait()
             if p.returncode != 0:
                 lockfile.close()
-                raise errors.CommandFailedError(
+                raise arvados.errors.CommandFailedError(
                     "unzip exited %d" % p.returncode)
             os.unlink(zip_filename)
         os.symlink(zipball, os.path.join(path, '.locator'))
@@ -250,7 +250,7 @@ def collection_extract(collection, path, files=[], decompress=True):
                     outfile.write(buf)
                 outfile.close()
     if len(files_got) < len(files):
-        raise errors.AssertionError(
+        raise arvados.errors.AssertionError(
             "Wanted files %s but only got %s from %s" %
             (files, files_got,
              [z.name() for z in CollectionReader(collection).all_files()]))
@@ -305,7 +305,7 @@ def stream_extract(stream, path, files=[], decompress=True):
                 outfile.write(buf)
             outfile.close()
     if len(files_got) < len(files):
-        raise errors.AssertionError(
+        raise arvados.errors.AssertionError(
             "Wanted files %s but only got %s from %s" %
             (files, files_got, [z.name() for z in stream.all_files()]))
     lockfile.close()
@@ -352,8 +352,8 @@ def is_hex(s, *length_args):
     """
     num_length_args = len(length_args)
     if num_length_args > 2:
-        raise errors.ArgumentError("is_hex accepts up to 3 arguments ({} given)"
-                                   .format(1 + num_length_args))
+        raise arvados.errors.ArgumentError(
+            "is_hex accepts up to 3 arguments ({} given)".format(1 + num_length_args))
     elif num_length_args == 2:
         good_len = (length_args[0] <= len(s) <= length_args[1])
     elif num_length_args == 1:
index 2451416dae38da1932f22fb7c6599b8a82e55110..60d4bec3b95c429643d7df4a600f72754954809a 100755 (executable)
@@ -24,10 +24,9 @@ parser.add_argument('locator', type=str,
                     help="""
 Collection locator, optionally with a file path or prefix.
 """)
-parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout',
+parser.add_argument('destination', type=str, nargs='?', default='-',
                     help="""
-Local file or directory where the data is to be written. Default:
-/dev/stdout.
+Local file or directory where the data is to be written. Default: stdout.
 """)
 group = parser.add_mutually_exclusive_group()
 group.add_argument('--progress', action='store_true',
@@ -74,7 +73,7 @@ group.add_argument('-f', action='store_true',
                    help="""
 Overwrite existing files while writing. The default behavior is to
 refuse to write *anything* if any of the output files already
-exist. As a special case, -f is not needed to write to /dev/stdout.
+exist. As a special case, -f is not needed to write to stdout.
 """)
 group.add_argument('--skip-existing', action='store_true',
                    help="""
@@ -100,9 +99,10 @@ if not args.r and (os.path.isdir(args.destination) or
     logger.debug("Appended source file name to destination directory: %s",
                  args.destination)
 
-if args.destination == '-':
-    args.destination = '/dev/stdout'
 if args.destination == '/dev/stdout':
+    args.destination = "-"
+
+if args.destination == '-':
     # Normally you have to use -f to write to a file (or device) that
     # already exists, but "-" and "/dev/stdout" are common enough to
     # merit a special exception.
@@ -115,7 +115,7 @@ else:
 # that isn't a tty.
 if (not (args.batch_progress or args.no_progress)
     and sys.stderr.isatty()
-    and (args.destination != '/dev/stdout'
+    and (args.destination != '-'
          or not sys.stdout.isatty())):
     args.progress = True
 
@@ -134,9 +134,12 @@ if not get_prefix:
         if not args.f:
             open_flags |= os.O_EXCL
         try:
-            out_fd = os.open(args.destination, open_flags)
-            with os.fdopen(out_fd, 'wb') as out_file:
-                out_file.write(reader.manifest_text())
+            if args.destination == "-":
+                sys.stdout.write(reader.manifest_text())
+            else:
+                out_fd = os.open(args.destination, open_flags)
+                with os.fdopen(out_fd, 'wb') as out_file:
+                    out_file.write(reader.manifest_text())
         except (IOError, OSError) as error:
             abort("can't write to '{}': {}".format(args.destination, error))
         except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
@@ -156,12 +159,15 @@ try:
                 if 0 != string.find(os.path.join(s.name(), f.name()),
                                     '.' + get_prefix):
                     continue
-                dest_path = os.path.join(
-                    args.destination,
-                    os.path.join(s.name(), f.name())[len(get_prefix)+1:])
-                if (not (args.n or args.f or args.skip_existing) and
-                    os.path.exists(dest_path)):
-                    abort('Local file %s already exists.' % (dest_path,))
+                if args.destination == "-":
+                    dest_path = "-"
+                else:
+                    dest_path = os.path.join(
+                        args.destination,
+                        os.path.join(s.name(), f.name())[len(get_prefix)+1:])
+                    if (not (args.n or args.f or args.skip_existing) and
+                        os.path.exists(dest_path)):
+                        abort('Local file %s already exists.' % (dest_path,))
             else:
                 if os.path.join(s.name(), f.name()) != '.' + get_prefix:
                     continue
@@ -178,20 +184,23 @@ for s,f,outfilename in todo:
     outfile = None
     digestor = None
     if not args.n:
-        if args.skip_existing and os.path.exists(outfilename):
-            logger.debug('Local file %s exists. Skipping.', outfilename)
-            continue
-        elif not args.f and (os.path.isfile(outfilename) or
-                           os.path.isdir(outfilename)):
-            # Good thing we looked again: apparently this file wasn't
-            # here yet when we checked earlier.
-            abort('Local file %s already exists.' % (outfilename,))
-        if args.r:
-            arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
-        try:
-            outfile = open(outfilename, 'wb')
-        except Exception as error:
-            abort('Open(%s) failed: %s' % (outfilename, error))
+        if outfilename == "-":
+            outfile = sys.stdout
+        else:
+            if args.skip_existing and os.path.exists(outfilename):
+                logger.debug('Local file %s exists. Skipping.', outfilename)
+                continue
+            elif not args.f and (os.path.isfile(outfilename) or
+                               os.path.isdir(outfilename)):
+                # Good thing we looked again: apparently this file wasn't
+                # here yet when we checked earlier.
+                abort('Local file %s already exists.' % (outfilename,))
+            if args.r:
+                arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
+            try:
+                outfile = open(outfilename, 'wb')
+            except Exception as error:
+                abort('Open(%s) failed: %s' % (outfilename, error))
     if args.hash:
         digestor = hashlib.new(args.hash)
     try:
@@ -216,8 +225,8 @@ for s,f,outfilename in todo:
             sys.stderr.write("%s  %s/%s\n"
                              % (digestor.hexdigest(), s.name(), f.name()))
     except KeyboardInterrupt:
-        if outfile and outfilename != '/dev/stdout':
-            os.unlink(outfilename)
+        if outfile and (outfile.fileno() > 2) and not outfile.closed:
+            os.unlink(outfile.name)
         break
 
 if args.progress:
index 02f65e6caf47470928fbbe7fcbee2cafb490e817..1c5162b97d87e476b9ff3badc82ffce3afdd4d28 100644 (file)
@@ -115,6 +115,33 @@ def find_available_port():
     sock.close()
     return port
 
+def _wait_until_port_listens(port, timeout=10):
+    """Wait for a process to start listening on the given port.
+
+    If nothing listens on the port within the specified timeout (given
+    in seconds), print a warning on stderr before returning.
+    """
+    try:
+        subprocess.check_output(['which', 'lsof'])
+    except subprocess.CalledProcessError:
+        print("WARNING: No `lsof` -- cannot wait for port to listen. "+
+              "Sleeping 0.5 and hoping for the best.")
+        time.sleep(0.5)
+        return
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        try:
+            subprocess.check_output(
+                ['lsof', '-t', '-i', 'tcp:'+str(port)])
+        except subprocess.CalledProcessError:
+            time.sleep(0.1)
+            continue
+        return
+    print(
+        "WARNING: Nothing is listening on port {} (waited {} seconds).".
+        format(port, timeout),
+        file=sys.stderr)
+
 def run(leave_running_atexit=False):
     """Ensure an API server is running, and ARVADOS_API_* env vars have
     admin credentials for it.
@@ -224,8 +251,10 @@ def run(leave_running_atexit=False):
     my_api_host = match.group(1)
     os.environ['ARVADOS_API_HOST'] = my_api_host
 
-    # Make sure the server has written its pid file before continuing
+    # Make sure the server has written its pid file and started
+    # listening on its TCP port
     find_server_pid(pid_file)
+    _wait_until_port_listens(port)
 
     reset()
     os.chdir(restore_cwd)
@@ -289,6 +318,8 @@ def _start_keep(n, keep_args):
     with open("{}/keep{}.volume".format(TEST_TMPDIR, n), 'w') as f:
         f.write(keep0)
 
+    _wait_until_port_listens(port)
+
     return port
 
 def run_keep(blob_signing_key=None, enforce_permissions=False):
@@ -369,6 +400,7 @@ def run_keep_proxy():
     }}).execute()
     os.environ["ARVADOS_KEEP_PROXY"] = "http://localhost:{}".format(port)
     _setport('keepproxy', port)
+    _wait_until_port_listens(port)
 
 def stop_keep_proxy():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
@@ -392,6 +424,7 @@ def run_arv_git_httpd():
     with open(_pidfile('arv-git-httpd'), 'w') as f:
         f.write(str(agh.pid))
     _setport('arv-git-httpd', gitport)
+    _wait_until_port_listens(gitport)
 
 def stop_arv_git_httpd():
     if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
diff --git a/sdk/python/tests/test_arv_ws.py b/sdk/python/tests/test_arv_ws.py
new file mode 100644 (file)
index 0000000..5a01827
--- /dev/null
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+import unittest
+import arvados.errors as arv_error
+import arvados.commands.ws as arv_ws
+
+class ArvWsTestCase(unittest.TestCase):
+    def run_ws(self, args):
+        return arv_ws.main(args)
+
+    def test_unsupported_arg(self):
+        with self.assertRaises(SystemExit):
+            self.run_ws(['-x=unknown'])
index f9e5d8ce40b2a462f10ec7cf710e146e0b795d0b..41739a92bd9396bf73c7562ce8db25dfb0e66750 100644 (file)
@@ -1,6 +1,8 @@
-import unittest
 import os
-import arvados.util
+import subprocess
+import unittest
+
+import arvados
 
 class MkdirDashPTest(unittest.TestCase):
     def setUp(self):
@@ -20,3 +22,15 @@ class MkdirDashPTest(unittest.TestCase):
         with open('./tmp/bar', 'wb') as f:
             f.write('bar')
         self.assertRaises(OSError, arvados.util.mkdir_dash_p, './tmp/bar')
+
+
+class RunCommandTestCase(unittest.TestCase):
+    def test_success(self):
+        stdout, stderr = arvados.util.run_command(['echo', 'test'],
+                                                  stderr=subprocess.PIPE)
+        self.assertEqual("test\n", stdout)
+        self.assertEqual("", stderr)
+
+    def test_failure(self):
+        with self.assertRaises(arvados.errors.CommandFailedError):
+            arvados.util.run_command(['false'])
index d879ebe1f8062c02d965bd9c845e5e00c57d1e76..ffa310baca558ab17cd3c5f245b5010c72d898b9 100644 (file)
@@ -5,6 +5,8 @@ import arvados
 import arvados.events
 import mock
 import threading
+from datetime import datetime, timedelta
+import time
 
 class WebsocketTest(run_test_server.TestCaseWithServers):
     MAIN_SERVER = {}
@@ -17,17 +19,43 @@ class WebsocketTest(run_test_server.TestCaseWithServers):
             self.ws.close()
         super(WebsocketTest, self).tearDown()
 
-    def _test_subscribe(self, poll_fallback, expect_type):
+    def _test_subscribe(self, poll_fallback, expect_type, last_log_id=None, additional_filters=None, expected=1):
         run_test_server.authorize_with('active')
-        events = Queue.Queue(3)
+        events = Queue.Queue(100)
+
+        # Create ancestor before subscribing.
+        # When listening with start_time in the past, this should also be retrieved.
+        # However, when start_time is omitted in subscribe, this should not be fetched.
+        ancestor = arvados.api('v1').humans().create(body={}).execute()
+        time.sleep(5)
+
+        filters = [['object_uuid', 'is_a', 'arvados#human']]
+        if additional_filters:
+            filters = filters + additional_filters
+
         self.ws = arvados.events.subscribe(
-            arvados.api('v1'), [['object_uuid', 'is_a', 'arvados#human']],
-            events.put, poll_fallback=poll_fallback)
+            arvados.api('v1'), filters,
+            events.put, poll_fallback=poll_fallback, last_log_id=last_log_id)
         self.assertIsInstance(self.ws, expect_type)
-        self.assertEqual(200, events.get(True, 10)['status'])
+        self.assertEqual(200, events.get(True, 5)['status'])
         human = arvados.api('v1').humans().create(body={}).execute()
-        self.assertEqual(human['uuid'], events.get(True, 10)['object_uuid'])
-        self.assertTrue(events.empty(), "got more events than expected")
+
+        if last_log_id == None or expected == 0:
+            self.assertEqual(human['uuid'], events.get(True, 5)['object_uuid'])
+            self.assertTrue(events.empty(), "got more events than expected")
+        else:
+            log_events = []
+            for i in range(0, 20):
+                try:
+                    event = events.get(True, 5)
+                    self.assertTrue(event['object_uuid'] is not None)
+                    log_events.append(event['object_uuid'])
+                except:
+                    break;
+
+            self.assertTrue(len(log_events)>1)
+            self.assertTrue(human['uuid'] in log_events)
+            self.assertTrue(ancestor['uuid'] in log_events)
 
     def test_subscribe_websocket(self):
         self._test_subscribe(
@@ -38,3 +66,63 @@ class WebsocketTest(run_test_server.TestCaseWithServers):
         event_client_constr.side_effect = Exception('All is well')
         self._test_subscribe(
             poll_fallback=1, expect_type=arvados.events.PollClient)
+
+    def test_subscribe_websocket_with_start_time_date_only(self):
+        lastHour = datetime.today() - timedelta(hours = 1)
+        self._test_subscribe(
+            poll_fallback=False, expect_type=arvados.events.EventClient, last_log_id=1,
+                additional_filters=[['created_at', '>=', lastHour.strftime('%Y-%m-%d')]])
+
+    @mock.patch('arvados.events.EventClient.__init__')
+    def test_poll_with_start_time_date_only(self, event_client_constr):
+        event_client_constr.side_effect = Exception('All is well')
+        lastHour = datetime.today() - timedelta(hours = 1)
+        self._test_subscribe(
+            poll_fallback=1, expect_type=arvados.events.PollClient, last_log_id=1,
+                additional_filters=[['created_at', '>=', lastHour.strftime('%Y-%m-%d')]])
+
+    def test_subscribe_websocket_with_start_time_last_hour(self):
+        lastHour = datetime.today() - timedelta(hours = 1)
+        self._test_subscribe(
+            poll_fallback=False, expect_type=arvados.events.EventClient, last_log_id=1,
+                additional_filters=[['created_at', '>=', lastHour.strftime('%Y-%m-%d %H:%M:%S')]])
+
+    @mock.patch('arvados.events.EventClient.__init__')
+    def test_subscribe_poll_with_start_time_last_hour(self, event_client_constr):
+        event_client_constr.side_effect = Exception('All is well')
+        lastHour = datetime.today() - timedelta(hours = 1)
+        self._test_subscribe(
+            poll_fallback=1, expect_type=arvados.events.PollClient, last_log_id=1,
+                additional_filters=[['created_at', '>=', lastHour.strftime('%Y-%m-%d %H:%M:%S')]])
+
+    def test_subscribe_websocket_with_start_time_next_hour(self):
+        nextHour = datetime.today() + timedelta(hours = 1)
+        with self.assertRaises(Queue.Empty):
+            self._test_subscribe(
+                poll_fallback=False, expect_type=arvados.events.EventClient, last_log_id=1,
+                    additional_filters=[['created_at', '>=', nextHour.strftime('%Y-%m-%d %H:%M:%S')]], expected=0)
+
+    @mock.patch('arvados.events.EventClient.__init__')
+    def test_subscribe_poll_with_start_time_next_hour(self, event_client_constr):
+        event_client_constr.side_effect = Exception('All is well')
+        nextHour = datetime.today() + timedelta(hours = 1)
+        with self.assertRaises(Queue.Empty):
+            self._test_subscribe(
+                poll_fallback=1, expect_type=arvados.events.PollClient, last_log_id=1,
+                    additional_filters=[['created_at', '>=', nextHour.strftime('%Y-%m-%d %H:%M:%S')]], expected=0)
+
+    def test_subscribe_websocket_with_start_time_tomorrow(self):
+        tomorrow = datetime.today() + timedelta(hours = 24)
+        with self.assertRaises(Queue.Empty):
+            self._test_subscribe(
+                poll_fallback=False, expect_type=arvados.events.EventClient, last_log_id=1,
+                    additional_filters=[['created_at', '>=', tomorrow.strftime('%Y-%m-%d')]], expected=0)
+
+    @mock.patch('arvados.events.EventClient.__init__')
+    def test_subscribe_poll_with_start_time_tomorrow(self, event_client_constr):
+        event_client_constr.side_effect = Exception('All is well')
+        tomorrow = datetime.today() + timedelta(hours = 24)
+        with self.assertRaises(Queue.Empty):
+            self._test_subscribe(
+                poll_fallback=1, expect_type=arvados.events.PollClient, last_log_id=1,
+                    additional_filters=[['created_at', '>=', tomorrow.strftime('%Y-%m-%d')]], expected=0)
index fd6ab582071cc65f27ad0c943ad2b2d75ff03f6b..4bf9a6a0945462e2bf74596d620ec21575541844 100644 (file)
@@ -2,15 +2,27 @@ class Arvados::V1::RepositoriesController < ApplicationController
   skip_before_filter :find_object_by_uuid, :only => :get_all_permissions
   skip_before_filter :render_404_if_no_object, :only => :get_all_permissions
   before_filter :admin_required, :only => :get_all_permissions
+
   def get_all_permissions
-    @users = {}
-    User.includes(:authorized_keys).find_each do |u|
-      @users[u.uuid] = u
+    # users is a map of {user_uuid => User object}
+    users = {}
+    # user_aks is a map of {user_uuid => array of public keys}
+    user_aks = {}
+    # admins is an array of user_uuids
+    admins = []
+    User.eager_load(:authorized_keys).find_each do |u|
+      next unless u.is_active or u.uuid == anonymous_user_uuid
+      users[u.uuid] = u
+      user_aks[u.uuid] = u.authorized_keys.collect do |ak|
+        {
+          public_key: ak.public_key,
+          authorized_key_uuid: ak.uuid
+        }
+      end
+      admins << u.uuid if u.is_admin
     end
-    admins = @users.select { |k,v| v.is_admin }
-    @user_aks = {}
     @repo_info = {}
-    Repository.includes(:permissions).find_each do |repo|
+    Repository.eager_load(:permissions).find_each do |repo|
       @repo_info[repo.uuid] = {
         uuid: repo.uuid,
         name: repo.name,
@@ -18,60 +30,92 @@ class Arvados::V1::RepositoriesController < ApplicationController
         fetch_url: repo.fetch_url,
         user_permissions: {},
       }
-      gitolite_permissions = ''
-      perms = []
+      # evidence is an array of {name: 'can_xxx', user_uuid: 'x-y-z'},
+      # one entry for each piece of evidence we find in the permission
+      # database that establishes that a user can access this
+      # repository. Multiple entries can be added for a given user,
+      # possibly with different access levels; these will be compacted
+      # below.
+      evidence = []
       repo.permissions.each do |perm|
         if ArvadosModel::resource_class_for_uuid(perm.tail_uuid) == Group
-          @users.each do |user_uuid, user|
-            user.group_permissions.each do |group_uuid, perm_mask|
-              if perm_mask[:manage]
-                perms << {name: 'can_manage', user_uuid: user_uuid}
-              elsif perm_mask[:write]
-                perms << {name: 'can_write', user_uuid: user_uuid}
-              elsif perm_mask[:read]
-                perms << {name: 'can_read', user_uuid: user_uuid}
-              end
+          # A group has permission. Each user who has access to this
+          # group also has access to the repository. Access level is
+          # min(group-to-repo permission, user-to-group permission).
+          users.each do |user_uuid, user|
+            perm_mask = user.group_permissions[perm.tail_uuid]
+            if not perm_mask
+              next
+            elsif perm_mask[:manage] and perm.name == 'can_manage'
+              evidence << {name: 'can_manage', user_uuid: user_uuid}
+            elsif perm_mask[:write] and ['can_manage', 'can_write'].index perm.name
+              evidence << {name: 'can_write', user_uuid: user_uuid}
+            elsif perm_mask[:read]
+              evidence << {name: 'can_read', user_uuid: user_uuid}
             end
           end
-        else
-          perms << {name: perm.name, user_uuid: perm.tail_uuid}
+        elsif users[perm.tail_uuid]
+          # A user has permission; the user exists; and either the
+          # user is active, or it's the special case of the anonymous
+          # user which is never "active" but is allowed to read
+          # content from public repositories.
+          evidence << {name: perm.name, user_uuid: perm.tail_uuid}
         end
       end
-      # Owner of the repository, and all admins, can RW
-      ([repo.owner_uuid] + admins.keys).each do |user_uuid|
-        perms << {name: 'can_write', user_uuid: user_uuid}
+      # Owner of the repository, and all admins, can do everything.
+      ([repo.owner_uuid] | admins).each do |user_uuid|
+        # Except: no permissions for inactive users, even if they own
+        # repositories.
+        next unless users[user_uuid]
+        evidence << {name: 'can_manage', user_uuid: user_uuid}
       end
-      perms.each do |perm|
+      # Distill all the evidence about permissions on this repository
+      # into one hash per user, of the form {'can_xxx' => true, ...}.
+      # The hash is nil for a user who has no permissions at all on
+      # this particular repository.
+      evidence.each do |perm|
         user_uuid = perm[:user_uuid]
-        @user_aks[user_uuid] = @users[user_uuid].andand.authorized_keys.andand.
-          collect do |ak|
-          {
-            public_key: ak.public_key,
-            authorized_key_uuid: ak.uuid
-          }
-        end || []
-        if @user_aks[user_uuid].any?
-          ri = (@repo_info[repo.uuid][:user_permissions][user_uuid] ||= {})
-          ri[perm[:name]] = true
-        end
+        user_perms = (@repo_info[repo.uuid][:user_permissions][user_uuid] ||= {})
+        user_perms[perm[:name]] = true
       end
     end
-    @repo_info.values.each do |repo_users|
-      repo_users[:user_permissions].each do |user_uuid,perms|
-        if perms['can_manage']
-          perms[:gitolite_permissions] = 'RW'
-          perms['can_write'] = true
-          perms['can_read'] = true
-        elsif perms['can_write']
-          perms[:gitolite_permissions] = 'RW'
-          perms['can_read'] = true
-        elsif perms['can_read']
-          perms[:gitolite_permissions] = 'R'
+    # Revisit each {'can_xxx' => true, ...} hash for some final
+    # cleanup to make life easier for the requestor.
+    #
+    # Add a 'gitolite_permissions' key alongside the 'can_xxx' keys,
+    # for the convenience of the gitolite config file generator.
+    #
+    # Add all lesser permissions when a greater permission is
+    # present. If the requestor only wants to know who can write, it
+    # only has to test for 'can_write' in the response.
+    @repo_info.values.each do |repo|
+      repo[:user_permissions].each do |user_uuid, user_perms|
+        if user_perms['can_manage']
+          user_perms['gitolite_permissions'] = 'RW'
+          user_perms['can_write'] = true
+          user_perms['can_read'] = true
+        elsif user_perms['can_write']
+          user_perms['gitolite_permissions'] = 'RW'
+          user_perms['can_read'] = true
+        elsif user_perms['can_read']
+          user_perms['gitolite_permissions'] = 'R'
         end
       end
     end
+    # The response looks like
+    #   {"kind":"...",
+    #    "repositories":[r1,r2,r3,...],
+    #    "user_keys":usermap}
+    # where each of r1,r2,r3 looks like
+    #   {"uuid":"repo-uuid-1",
+    #    "name":"username/reponame",
+    #    "push_url":"...",
+    #    "user_permissions":{"user-uuid-a":{"can_read":true,"gitolite_permissions":"R"}}}
+    # and usermap looks like
+    #   {"user-uuid-a":[{"public_key":"ssh-rsa g...","authorized_key_uuid":"ak-uuid-g"},...],
+    #    "user-uuid-b":[{"public_key":"ssh-rsa h...","authorized_key_uuid":"ak-uuid-h"},...],...}
     send_json(kind: 'arvados#RepositoryPermissionSnapshot',
               repositories: @repo_info.values,
-              user_keys: @user_aks)
+              user_keys: user_aks)
   end
 end
index 519178b0bbcf319426dbca39d181be58ef800420..84251db470fff95bbbc23f27397263124464c9d3 100644 (file)
@@ -9,32 +9,40 @@ class Arvados::V1::VirtualMachinesController < ApplicationController
   end
 
   def get_all_logins
-    @users = {}
-    User.includes(:authorized_keys).all.each do |u|
-      @users[u.uuid] = u
-    end
     @response = []
-    @vms = VirtualMachine.includes(:login_permissions)
+    @vms = VirtualMachine.eager_load :login_permissions
     if @object
-      @vms = @vms.where('uuid=?', @object.uuid)
+      @vms = @vms.where uuid: @object.uuid
     else
       @vms = @vms.all
     end
+    @users = {}
+    User.eager_load(:authorized_keys).
+      where('users.uuid in (?)',
+            @vms.map { |vm| vm.login_permissions.map &:tail_uuid }.flatten.uniq).
+      each do |u|
+      @users[u.uuid] = u
+    end
     @vms.each do |vm|
       vm.login_permissions.each do |perm|
         user_uuid = perm.tail_uuid
-        @users[user_uuid].andand.authorized_keys.andand.each do |ak|
-          unless perm.properties['username'].blank?
-            @response << {
-              username: perm.properties['username'],
-              hostname: vm.hostname,
-              groups: (perm.properties["groups"].to_a rescue []),
-              public_key: ak.public_key,
-              user_uuid: user_uuid,
-              virtual_machine_uuid: vm.uuid,
-              authorized_key_uuid: ak.uuid
-            }
-          end
+        next if not @users[user_uuid]
+        next if perm.properties['username'].blank?
+        aks = @users[user_uuid].authorized_keys
+        if aks.empty?
+          # We'll emit one entry, with no public key.
+          aks = [nil]
+        end
+        aks.each do |ak|
+          @response << {
+            username: perm.properties['username'],
+            hostname: vm.hostname,
+            groups: (perm.properties['groups'].to_a rescue []),
+            public_key: ak ? ak.public_key : nil,
+            user_uuid: user_uuid,
+            virtual_machine_uuid: vm.uuid,
+            authorized_key_uuid: ak ? ak.uuid : nil,
+          }
         end
       end
     end
index d5b2f871cc89a138a894753deaf9b026b0bc8443..35dd1a94c9d983b343fc6394370f03ca795ca896 100644 (file)
@@ -23,6 +23,7 @@ class ArvadosModel < ActiveRecord::Base
   after_destroy :log_destroy
   after_find :convert_serialized_symbols_to_strings
   before_validation :normalize_collection_uuids
+  before_validation :set_default_owner
   validate :ensure_serialized_attribute_type
   validate :ensure_valid_uuids
 
@@ -276,12 +277,14 @@ class ArvadosModel < ActiveRecord::Base
     true
   end
 
-  def ensure_owner_uuid_is_permitted
-    raise PermissionDeniedError if !current_user
-
-    if new_record? and respond_to? :owner_uuid=
+  def set_default_owner
+    if new_record? and current_user and respond_to? :owner_uuid=
       self.owner_uuid ||= current_user.uuid
     end
+  end
+
+  def ensure_owner_uuid_is_permitted
+    raise PermissionDeniedError if !current_user
 
     if self.owner_uuid.nil?
       errors.add :owner_uuid, "cannot be nil"
index b156a1d0f697440ae0912ff352049bdf0de28c2a..452cd6967bfa7c40d22f8746cc69c2ea10c7fc89 100644 (file)
@@ -33,14 +33,14 @@ class AuthorizedKey < ArvadosModel
 
   def public_key_must_be_unique
     if self.public_key
-      #key = /^ssh-(rsa|dss) [A-Za-z0-9+\/=\+]+\b/.match(self.public_key)
       valid_key = SSHKey.valid_ssh_public_key? self.public_key
 
       if not valid_key
         errors.add(:public_key, "does not appear to be a valid ssh-rsa or dsa public key")
       else
         # Valid if no other rows have this public key
-        if self.class.where('public_key like ?', "%#{self.public_key}%").any?
+        if self.class.where('uuid != ? and public_key like ?',
+                            uuid || '', "%#{self.public_key}%").any?
           errors.add(:public_key, "already exists in the database, use a different key.")
           return false
         end
index a6b085722e90fb043a2277f7781727218e8e2559..f74e2fedc7396335e6ff537bc2e882aa5da3e177 100644 (file)
@@ -58,12 +58,22 @@ class Commit < ActiveRecord::Base
 
     # Get the commit hash for the upper bound
     max_hash = nil
-    IO.foreach("|git rev-list --max-count=1 #{maximum.shellescape} --") do |line|
+    git_max_hash_cmd = "git rev-list --max-count=1 #{maximum.shellescape} --"
+    IO.foreach("|#{git_max_hash_cmd}") do |line|
       max_hash = line.strip
     end
 
-    # If not found or string is invalid, nothing else to do
-    return [] if !max_hash or !git_check_ref_format(max_hash)
+    # If not found, nothing else to do
+    if !max_hash
+      logger.warn "no refs found looking for max_hash: `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` returned no output"
+      return []
+    end
+
+    # If string is invalid, nothing else to do
+    if !git_check_ref_format(max_hash)
+      logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` was invalid for max_hash: #{max_hash}"
+      return []
+    end
 
     resolved_exclude = nil
     if exclude
@@ -83,12 +93,22 @@ class Commit < ActiveRecord::Base
     if minimum
       # Get the commit hash for the lower bound
       min_hash = nil
-      IO.foreach("|git rev-list --max-count=1 #{minimum.shellescape} --") do |line|
+      git_min_hash_cmd = "git rev-list --max-count=1 #{minimum.shellescape} --"
+      IO.foreach("|#{git_min_hash_cmd}") do |line|
         min_hash = line.strip
       end
 
-      # If not found or string is invalid, nothing else to do
-      return [] if !min_hash or !git_check_ref_format(min_hash)
+      # If not found, nothing else to do
+      if !min_hash
+        logger.warn "no refs found looking for min_hash: `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` returned no output"
+        return []
+      end
+
+      # If string is invalid, nothing else to do
+      if !git_check_ref_format(min_hash)
+        logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` was invalid for min_hash: #{min_hash}"
+        return []
+      end
 
       # Now find all commits between them
       IO.foreach("|git rev-list #{min_hash.shellescape}..#{max_hash.shellescape} --") do |line|
index 409dea63bb49c08e458f197f89bfbbeb68ee1231..bec6c211a31e7f1ef0fc92765fd7aecd53cd5855 100644 (file)
@@ -1,80 +1,39 @@
 # Do not use this file for site configuration. Create application.yml
 # instead (see application.yml.example).
-
-development:
-  force_ssl: false
-  cache_classes: false
-  whiny_nils: true
-  consider_all_requests_local: true
-  action_controller.perform_caching: false
-  action_mailer.raise_delivery_errors: false
-  action_mailer.perform_deliveries: false
-  active_support.deprecation: :log
-  action_dispatch.best_standards_support: :builtin
-  active_record.mass_assignment_sanitizer: :strict
-  active_record.auto_explain_threshold_in_seconds: 0.5
-  assets.compress: false
-  assets.debug: true
-  local_modified: "<%= '-modified' if `git status -s` != '' %>"
-
-production:
-  force_ssl: true
-  cache_classes: true
-  consider_all_requests_local: false
-  action_controller.perform_caching: true
-  serve_static_assets: false
-  assets.compress: true
-  assets.compile: false
-  assets.digest: true
-
-test:
-  force_ssl: false
-  cache_classes: true
-  serve_static_assets: true
-  static_cache_control: public, max-age=3600
-  whiny_nils: true
-  consider_all_requests_local: true
-  action_controller.perform_caching: false
-  action_dispatch.show_exceptions: false
-  action_controller.allow_forgery_protection: false
-  action_mailer.delivery_method: :test
-  active_support.deprecation: :stderr
-  active_record.mass_assignment_sanitizer: :strict
-  uuid_prefix: zzzzz
-  sso_app_id: arvados-server
-  sso_app_secret: <%= rand(2**512).to_s(36) %>
-  sso_provider_url: http://localhost:3002
-  secret_token: <%= rand(2**512).to_s(36) %>
-  blob_signing_key: zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc
-  user_profile_notification_address: arvados@example.com
-  workbench_address: https://localhost:3001/
-  git_repositories_dir: <%= Rails.root.join 'tmp', 'git', 'test' %>
-  git_internal_dir: <%= Rails.root.join 'tmp', 'internal.git' %>
+#
+# The order of precedence is:
+# 1. config/environments/{RAILS_ENV}.rb (deprecated)
+# 2. Section in application.yml corresponding to RAILS_ENV (e.g., development)
+# 3. Section in application.yml called "common"
+# 4. Section in application.default.yml corresponding to RAILS_ENV
+# 5. Section in application.default.yml called "common"
 
 common:
+  ###
+  ### Essential site configuration
+  ###
+
   # The prefix used for all database identifiers to identify the record as
   # originating from this site.  Must be exactly 5 alphanumeric characters
   # (lowercase ASCII letters and digits).
-  uuid_prefix: <%= Digest::MD5.hexdigest(`hostname`).to_i(16).to_s(36)[0..4] %>
+  uuid_prefix: ~
 
-  # If not false, this is the hostname that will be used for root_url and
-  # advertised in the discovery document.  By default, use the default Rails
-  # logic for deciding on a hostname.
-  host: false
+  # secret_token is a string of alphanumeric characters used by Rails
+  # to sign session tokens. IMPORTANT: This is a site secret. It
+  # should be at least 50 characters.
+  secret_token: ~
 
-  # Base part of SSH git clone url given with repository resources. If
-  # true, the default "git@git.(uuid_prefix).arvadosapi.com:" is
-  # used. If false, SSH clone URLs are not advertised. Include a
-  # trailing ":" or "/" if needed: it will not be added automatically.
-  git_repo_ssh_base: true
+  # blob_signing_key is a string of alphanumeric characters used to
+  # generate permission signatures for Keep locators. It must be
+  # identical to the permission key given to Keep. IMPORTANT: This is
+  # a site secret. It should be at least 50 characters.
+  blob_signing_key: ~
 
-  # Base part of HTTPS git clone urls given with repository
-  # resources. This is expected to be an arv-git-httpd service which
-  # accepts API tokens as HTTP-auth passwords. If true, the default
-  # "https://git.(uuid_prefix).arvadosapi.com/" is used. If false,
-  # HTTPS clone URLs are not advertised. Include a trailing ":" or "/"
-  # if needed: it will not be added automatically.
-  git_repo_https_base: true
+  # These settings are provided by your OAuth2 provider (e.g.,
+  # sso-provider).
+  sso_app_secret: ~
+  sso_app_id: ~
+  sso_provider_url: ~
 
   # If this is not false, HTML requests at the API server's root URL
   # are redirected to this location, and it is provided in the text of
@@ -82,6 +41,30 @@ common:
   # to log in.
   workbench_address: false
 
+  # The ARVADOS_WEBSOCKETS environment variable determines whether to
+  # serve http, websockets, or both.
+  #
+  # If ARVADOS_WEBSOCKETS="true", http and websockets are both served
+  # from the same process.
+  #
+  # If ARVADOS_WEBSOCKETS="ws-only", only websockets is served.
+  #
+  # If ARVADOS_WEBSOCKETS="false" or not set at all, only http is
+  # served. In this case, you should have a separate process serving
+  # websockets, and the address of that service should be given here
+  # as websocket_address.
+  #
+  # If websocket_address is false (which is the default), the
+  # discovery document will tell clients to use the current server as
+  # the websocket service, or (if the current server does not have
+  # websockets enabled) not to use websockets at all.
+  #
+  # Example: Clients will connect to the specified endpoint.
+  #websocket_address: wss://127.0.0.1:3333/websocket
+  # Default: Clients will connect to this server if it's running
+  # websockets, otherwise none at all.
+  websocket_address: false
+
   # Git repositories must be readable by api server, or you won't be
   # able to submit crunch jobs. To pass the test suites, put a clone
   # of the arvados tree in {git_repositories_dir}/arvados.git or
@@ -94,93 +77,54 @@ common:
   # subdirectory of {git_repositiories_dir}.
   git_internal_dir: /var/lib/arvados/internal.git
 
-  # :none or :slurm_immediate
-  crunch_job_wrapper: :none
-
-  # username, or false = do not set uid when running jobs.
-  crunch_job_user: crunch
-
-  # The web service must be able to create/write this file, and
-  # crunch-job must be able to stat() it.
-  crunch_refresh_trigger: /tmp/crunch_refresh_trigger
-
-  # These two settings control how frequently log events are flushed to the
-  # database.  Log lines are buffered until either crunch_log_bytes_per_event
-  # has been reached or crunch_log_seconds_between_events has elapsed since
-  # the last flush.
-  crunch_log_bytes_per_event: 4096
-  crunch_log_seconds_between_events: 1
-
-  # The sample period for throttling logs, in seconds.
-  crunch_log_throttle_period: 60
-
-  # Maximum number of bytes that job can log over crunch_log_throttle_period
-  # before being silenced until the end of the period.
-  crunch_log_throttle_bytes: 65536
-
-  # Maximum number of lines that job can log over crunch_log_throttle_period
-  # before being silenced until the end of the period.
-  crunch_log_throttle_lines: 1024
-
-  # Maximum bytes that may be logged by a single job.  Log bytes that are
-  # silenced by throttling are not counted against this total.
-  crunch_limit_log_bytes_per_job: 67108864
+  # Default replication level for collections. This is used when a
+  # collection's replication_desired attribute is nil.
+  default_collection_replication: 2
 
-  # Path to dns server configuration directory
-  # (e.g. /etc/unbound.d/conf.d). If false, do not write any config
-  # files or touch restart.txt (see below).
-  dns_server_conf_dir: false
 
-  # Template file for the dns server host snippets. See
-  # unbound.template in this directory for an example. If false, do
-  # not write any config files.
-  dns_server_conf_template: false
+  ###
+  ### Overriding default advertised hostnames/URLs
+  ###
 
-  # String to write to {dns_server_conf_dir}/restart.txt (with a
-  # trailing newline) after updating local data. If false, do not
-  # open or write the restart.txt file.
-  dns_server_reload_command: false
+  # If not false, this is the hostname that will be used for root_url and
+  # advertised in the discovery document.  By default, use the default Rails
+  # logic for deciding on a hostname.
+  host: false
 
-  # Command to run after each DNS update. Template variables will be
-  # substituted; see the "unbound" example below. If false, do not run
-  # a command.
-  dns_server_update_command: false
+  # Base part of SSH git clone url given with repository resources. If
+  # true, the default "git@git.(uuid_prefix).arvadosapi.com:" is
+  # used. If false, SSH clone URLs are not advertised. Include a
+  # trailing ":" or "/" if needed: it will not be added automatically.
+  git_repo_ssh_base: true
 
-  ## Example for unbound:
-  #dns_server_conf_dir: /etc/unbound/conf.d
-  #dns_server_conf_template: /path/to/your/api/server/config/unbound.template
-  ## ...plus one of the following two methods of reloading:
-  #dns_server_reload_command: unbound-control reload
-  #dns_server_update_command: echo %{hostname} %{hostname}.%{uuid_prefix} %{hostname}.%{uuid_prefix}.arvadosapi.com %{ptr_domain} | xargs -n 1 unbound-control local_data_remove && unbound-control local_data %{hostname} IN A %{ip_address} && unbound-control local_data %{hostname}.%{uuid_prefix} IN A %{ip_address} && unbound-control local_data %{hostname}.%{uuid_prefix}.arvadosapi.com IN A %{ip_address} && unbound-control local_data %{ptr_domain}. IN PTR %{hostname}.%{uuid_prefix}.arvadosapi.com
+  # Base part of HTTPS git clone urls given with repository
+  # resources. This is expected to be an arv-git-httpd service which
+  # accepts API tokens as HTTP-auth passwords. If true, the default
+  # "https://git.(uuid_prefix).arvadosapi.com/" is used. If false,
+  # HTTPS clone URLs are not advertised. Include a trailing ":" or "/"
+  # if needed: it will not be added automatically.
+  git_repo_https_base: true
 
-  compute_node_domain: false
-  compute_node_nameservers:
-    - 192.168.1.1
 
-  # The version below is suitable for AWS.
-  # To use it, copy it to your application.yml, uncomment, and change <%# to <%=
-  # compute_node_nameservers: <%#
-  #   require 'net/http'
-  #   ['local', 'public'].collect do |iface|
-  #     Net::HTTP.get(URI("http://169.254.169.254/latest/meta-data/#{iface}-ipv4")).match(/^[\d\.]+$/)[0]
-  #   end << '172.16.0.23'
-  # %>
+  ###
+  ### New user and & email settings
+  ###
 
-  accept_api_token: {}
+  # Config parameters to automatically setup new users.
+  # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on.
+  # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup.
+  auto_setup_new_users: false
+  auto_setup_new_users_with_vm_uuid: false
+  auto_setup_new_users_with_repository: false
+  auto_setup_name_blacklist: [arvados, git, gitolite, gitolite-admin, root, syslog]
 
   # When new_users_are_active is set to true, the user agreement check is skipped.
   new_users_are_active: false
 
-  admin_notifier_email_from: arvados@example.com
-  email_subject_prefix: "[ARVADOS] "
-  user_notifier_email_from: arvados@example.com
-  new_user_notification_recipients: [ ]
-  new_inactive_user_notification_recipients: [ ]
-
   # The e-mail address of the user you would like to become marked as an admin
   # user on their first login.
   # In the default configuration, authentication happens through the Arvados SSO
-  # server, which uses openid against Google's servers, so in that case this
+  # server, which uses OAuth2 against Google's servers, so in that case this
   # should be an address associated with a Google account.
   auto_admin_user: false
 
@@ -188,51 +132,20 @@ common:
   # other admin users exist will automatically become an admin user.
   auto_admin_first_user: false
 
-  ## Set Time.zone default to the specified zone and make Active
-  ## Record auto-convert to this zone.  Run "rake -D time" for a list
-  ## of tasks for finding time zone names. Default is UTC.
-  #time_zone: Central Time (US & Canada)
-
-  ## Default encoding used in templates for Ruby 1.9.
-  encoding: utf-8
-
-  # Enable the asset pipeline
-  assets.enabled: true
-
-  # Version of your assets, change this if you want to expire all your assets
-  assets.version: "1.0"
+  # Email address to notify whenever a user creates a profile for the
+  # first time
+  user_profile_notification_address: false
 
-  arvados_theme: default
+  admin_notifier_email_from: arvados@example.com
+  email_subject_prefix: "[ARVADOS] "
+  user_notifier_email_from: arvados@example.com
+  new_user_notification_recipients: [ ]
+  new_inactive_user_notification_recipients: [ ]
 
-  # The ARVADOS_WEBSOCKETS environment variable determines whether to
-  # serve http, websockets, or both.
-  #
-  # If ARVADOS_WEBSOCKETS="true", http and websockets are both served
-  # from the same process.
-  #
-  # If ARVADOS_WEBSOCKETS="ws-only", only websockets is served.
-  #
-  # If ARVADOS_WEBSOCKETS="false" or not set at all, only http is
-  # served. In this case, you should have a separate process serving
-  # websockets, and the address of that service should be given here
-  # as websocket_address.
-  #
-  # If websocket_address is false (which is the default), the
-  # discovery document will tell clients to use the current server as
-  # the websocket service, or (if the current server does not have
-  # websockets enabled) not to use websockets at all.
-  #
-  # Example: Clients will connect to the specified endpoint.
-  #websocket_address: wss://127.0.0.1:3333/websocket
-  # Default: Clients will connect to this server if it's running
-  # websockets, otherwise none at all.
-  websocket_address: false
 
-  # blob_signing_key is a string of alphanumeric characters used to
-  # generate permission signatures for Keep locators. It must be
-  # identical to the permission key given to Keep. IMPORTANT: This is
-  # a site secret. It should be at least 50 characters.
-  blob_signing_key: ~
+  ###
+  ### Limits, timeouts and durations
+  ###
 
   # Lifetime (in seconds) of blob permission signatures generated by
   # the API server. This determines how long a client can take (after
@@ -252,58 +165,9 @@ common:
   # The default is 2 weeks.
   blob_signature_ttl: 1209600
 
-  # Allow clients to create collections by providing a manifest with
-  # unsigned data blob locators. IMPORTANT: This effectively disables
-  # access controls for data stored in Keep: a client who knows a hash
-  # can write a manifest that references the hash, pass it to
-  # collections.create (which will create a permission link), use
-  # collections.get to obtain a signature for that data locator, and
-  # use that signed locator to retrieve the data from Keep. Therefore,
-  # do not turn this on if your users expect to keep data private from
-  # one another!
-  permit_create_collection_with_unsigned_manifest: false
-
-  # secret_token is a string of alphanumeric characters used by Rails
-  # to sign session tokens. IMPORTANT: This is a site secret. It
-  # should be at least 50 characters.
-  secret_token: ~
-
-  # Email address to notify whenever a user creates a profile for the
-  # first time
-  user_profile_notification_address: false
-
-  default_openid_prefix: https://www.google.com/accounts/o8/id
-
-  # Config parameters to automatically setup new users.
-  # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on.
-  # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup.
-  auto_setup_new_users: false
-  auto_setup_new_users_with_vm_uuid: false
-  auto_setup_new_users_with_repository: false
-  auto_setup_name_blacklist: [arvados, git, gitolite, gitolite-admin, root, syslog]
-
-  # source_version
-  source_version: "<%= `git log -n 1 --format=%h`.strip %>"
-  local_modified: false
-
   # Default lifetime for ephemeral collections: 2 weeks.
   default_trash_lifetime: 1209600
 
-  # Permit insecure (OpenSSL::SSL::VERIFY_NONE) connections to the Single Sign
-  # On (sso) server.  Should only be enabled during development when the SSO
-  # server is using a self-signed cert.
-  sso_insecure: false
-
-  # These settings are provided by your OAuth2 provider (e.g.,
-  # sso-provider).
-  sso_app_id: ~
-  sso_app_secret: ~
-  sso_provider_url: ~
-
-  # Default replication level for collections. This is used when a
-  # collection's replication_desired attribute is nil.
-  default_collection_replication: 2
-
   # Maximum size (in bytes) allowed for a single API request.  This
   # limit is published in the discovery document for use by clients.
   # Note: You must separately configure the upstream web server or
@@ -336,9 +200,77 @@ common:
   # go down.
   max_compute_nodes: 64
 
+  # These two settings control how frequently log events are flushed to the
+  # database.  Log lines are buffered until either crunch_log_bytes_per_event
+  # has been reached or crunch_log_seconds_between_events has elapsed since
+  # the last flush.
+  crunch_log_bytes_per_event: 4096
+  crunch_log_seconds_between_events: 1
+
+  # The sample period for throttling logs, in seconds.
+  crunch_log_throttle_period: 60
+
+  # Maximum number of bytes that job can log over crunch_log_throttle_period
+  # before being silenced until the end of the period.
+  crunch_log_throttle_bytes: 65536
+
+  # Maximum number of lines that job can log over crunch_log_throttle_period
+  # before being silenced until the end of the period.
+  crunch_log_throttle_lines: 1024
+
+  # Maximum bytes that may be logged by a single job.  Log bytes that are
+  # silenced by throttling are not counted against this total.
+  crunch_limit_log_bytes_per_job: 67108864
+
+
+  ###
+  ### Crunch, DNS & compute node management
+  ###
+
   # Docker image to be used when none found in runtime_constraints of a job
   default_docker_image_for_jobs: false
 
+  # :none or :slurm_immediate
+  crunch_job_wrapper: :none
+
+  # username, or false = do not set uid when running jobs.
+  crunch_job_user: crunch
+
+  # The web service must be able to create/write this file, and
+  # crunch-job must be able to stat() it.
+  crunch_refresh_trigger: /tmp/crunch_refresh_trigger
+
+  # Path to dns server configuration directory
+  # (e.g. /etc/unbound.d/conf.d). If false, do not write any config
+  # files or touch restart.txt (see below).
+  dns_server_conf_dir: false
+
+  # Template file for the dns server host snippets. See
+  # unbound.template in this directory for an example. If false, do
+  # not write any config files.
+  dns_server_conf_template: false
+
+  # String to write to {dns_server_conf_dir}/restart.txt (with a
+  # trailing newline) after updating local data. If false, do not
+  # open or write the restart.txt file.
+  dns_server_reload_command: false
+
+  # Command to run after each DNS update. Template variables will be
+  # substituted; see the "unbound" example below. If false, do not run
+  # a command.
+  dns_server_update_command: false
+
+  ## Example for unbound:
+  #dns_server_conf_dir: /etc/unbound/conf.d
+  #dns_server_conf_template: /path/to/your/api/server/config/unbound.template
+  ## ...plus one of the following two methods of reloading:
+  #dns_server_reload_command: unbound-control reload
+  #dns_server_update_command: echo %{hostname} %{hostname}.%{uuid_prefix} %{hostname}.%{uuid_prefix}.arvadosapi.com %{ptr_domain} | xargs -n 1 unbound-control local_data_remove && unbound-control local_data %{hostname} IN A %{ip_address} && unbound-control local_data %{hostname}.%{uuid_prefix} IN A %{ip_address} && unbound-control local_data %{hostname}.%{uuid_prefix}.arvadosapi.com IN A %{ip_address} && unbound-control local_data %{ptr_domain}. IN PTR %{hostname}.%{uuid_prefix}.arvadosapi.com
+
+  compute_node_domain: false
+  compute_node_nameservers:
+    - 192.168.1.1
+
   # Hostname to assign to a compute node when it sends a "ping" and the
   # hostname in its Node record is nil.
   # During bootstrapping, the "ping" script is expected to notice the
@@ -354,3 +286,97 @@ common:
   # assign_node_hostname: compute%<slot_number>04d
   # (See http://ruby-doc.org/core-2.2.2/Kernel.html#method-i-format for more.)
   assign_node_hostname: compute%<slot_number>d
+
+
+  ###
+  ### Remaining assorted configuration options.
+  ###
+
+  arvados_theme: default
+
+  # Permit insecure (OpenSSL::SSL::VERIFY_NONE) connections to the Single Sign
+  # On (sso) server.  Should only be enabled during development when the SSO
+  # server is using a self-signed cert.
+  sso_insecure: false
+
+  ## Set Time.zone default to the specified zone and make Active
+  ## Record auto-convert to this zone.  Run "rake -D time" for a list
+  ## of tasks for finding time zone names. Default is UTC.
+  #time_zone: Central Time (US & Canada)
+
+  ## Default encoding used in templates for Ruby 1.9.
+  encoding: utf-8
+
+  # Enable the asset pipeline
+  assets.enabled: true
+
+  # Version of your assets, change this if you want to expire all your assets
+  assets.version: "1.0"
+
+  # Allow clients to create collections by providing a manifest with
+  # unsigned data blob locators. IMPORTANT: This effectively disables
+  # access controls for data stored in Keep: a client who knows a hash
+  # can write a manifest that references the hash, pass it to
+  # collections.create (which will create a permission link), use
+  # collections.get to obtain a signature for that data locator, and
+  # use that signed locator to retrieve the data from Keep. Therefore,
+  # do not turn this on if your users expect to keep data private from
+  # one another!
+  permit_create_collection_with_unsigned_manifest: false
+
+  default_openid_prefix: https://www.google.com/accounts/o8/id
+
+  # source_version
+  source_version: "<%= `git log -n 1 --format=%h`.strip %>"
+  local_modified: false
+
+
+development:
+  force_ssl: false
+  cache_classes: false
+  whiny_nils: true
+  consider_all_requests_local: true
+  action_controller.perform_caching: false
+  action_mailer.raise_delivery_errors: false
+  action_mailer.perform_deliveries: false
+  active_support.deprecation: :log
+  action_dispatch.best_standards_support: :builtin
+  active_record.mass_assignment_sanitizer: :strict
+  active_record.auto_explain_threshold_in_seconds: 0.5
+  assets.compress: false
+  assets.debug: true
+  local_modified: "<%= '-modified' if `git status -s` != '' %>"
+
+production:
+  force_ssl: true
+  cache_classes: true
+  consider_all_requests_local: false
+  action_controller.perform_caching: true
+  serve_static_assets: false
+  assets.compress: true
+  assets.compile: false
+  assets.digest: true
+
+test:
+  force_ssl: false
+  cache_classes: true
+  serve_static_assets: true
+  static_cache_control: public, max-age=3600
+  whiny_nils: true
+  consider_all_requests_local: true
+  action_controller.perform_caching: false
+  action_dispatch.show_exceptions: false
+  action_controller.allow_forgery_protection: false
+  action_mailer.delivery_method: :test
+  active_support.deprecation: :stderr
+  active_record.mass_assignment_sanitizer: :strict
+  uuid_prefix: zzzzz
+  sso_app_id: arvados-server
+  sso_app_secret: <%= rand(2**512).to_s(36) %>
+  sso_provider_url: http://localhost:3002
+  secret_token: <%= rand(2**512).to_s(36) %>
+  blob_signing_key: zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc
+  user_profile_notification_address: arvados@example.com
+  workbench_address: https://localhost:3001/
+  git_repositories_dir: <%= Rails.root.join 'tmp', 'git', 'test' %>
+  git_internal_dir: <%= Rails.root.join 'tmp', 'internal.git' %>
index ee14c0eeb2742776fd3d204cab078e511d7706f4..149770c9ac38c9e40b311fae2b429eebd09009d1 100644 (file)
 # 4. Section in application.default.yml corresponding to RAILS_ENV
 # 5. Section in application.default.yml called "common"
 
-development:
-  # Mandatory site secrets. See application.default.yml for more info.
+production:
+  # Mandatory site configuration.  See application.default.yml and
+  # http://http://doc.arvados.org/install/install-api-server.html#configure_application
+  # for more information.
+  uuid_prefix: ~
   secret_token: ~
   blob_signing_key: ~
-  uuid_prefix: bogus
-  workbench_address: https://localhost:3031
+  sso_app_secret: ~
+  sso_app_id: ~
+  sso_provider_url: ~
+  workbench_address: ~
+  websockets_address: ~
+  #git_repositories_dir: ~
+  #git_internal_dir: ~
 
-production:
-  # Mandatory site secrets. See application.default.yml for more info.
+development:
+  # Separate settings for development configuration.
+  uuid_prefix: ~
   secret_token: ~
   blob_signing_key: ~
-  uuid_prefix: bogus
-  workbench_address: https://workbench.bogus.arvadosapi.com
-  sso_app_id: arvados-server
+  sso_app_id: ~
   sso_app_secret: ~
-  sso_provider_url: https://login.bogus.arvadosapi.com
+  sso_provider_url: ~
+  workbench_address: ~
+  websockets_address: ~
+  #git_repositories_dir: ~
+  #git_internal_dir: ~
 
 test:
   # Tests should be able to run without further configuration, but if you do
diff --git a/services/api/config/initializers/hardcoded_api_tokens.rb.example b/services/api/config/initializers/hardcoded_api_tokens.rb.example
deleted file mode 100644 (file)
index 6339bf6..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-Server::Application.configure do
-  config.accept_api_token = { 'foobar' => true }
-end
index 35671d65b287e495a76b2fc94b47cdf588983350..ac53876122d6b2e74b0d9fed85a56308308465b4 100644 (file)
@@ -116,7 +116,7 @@ class EventBus
 
         # Execute query and actually send the matching log rows
         count = 0
-        limit = 20
+        limit = 10
 
         logs.limit(limit).each do |l|
           ws.send(l.as_api_response.to_json)
@@ -128,7 +128,7 @@ class EventBus
           # Number of rows returned was capped by limit(), we need to schedule
           # another query to get more logs (will start from last_log_id
           # reported by current query)
-          EventMachine::schedule do
+          EventMachine::next_tick do
             push_events ws, nil
           end
         elsif !notify_id.nil? and (ws.last_log_id.nil? or notify_id > ws.last_log_id)
@@ -140,10 +140,15 @@ class EventBus
         # No filters set up, so just record the sequence number
         ws.last_log_id = notify_id
       end
+    rescue ArgumentError => e
+      # There was some kind of user error.
+      Rails.logger.warn "Error publishing event: #{$!}"
+      ws.send ({status: 500, message: $!}.to_json)
+      ws.close
     rescue => e
       Rails.logger.warn "Error publishing event: #{$!}"
       Rails.logger.warn "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
-      ws.send ({status: 500, message: 'error'}.to_json)
+      ws.send ({status: 500, message: $!}.to_json)
       ws.close
       # These exceptions typically indicate serious server trouble:
       # out of memory issues, database connection problems, etc.  Go ahead and
index 434f9c768d2dbeb4a0e908fbd6d8de6d6d903f80..925e4661248279b1543052fd8f8cc563e5efc8d8 100644 (file)
@@ -405,6 +405,20 @@ admin_can_write_aproject:
   head_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
   properties: {}
 
+project_viewer_member_of_all_users_group:
+  uuid: zzzzz-o0j2j-cdnq6627g0h0r2x
+  owner_uuid: zzzzz-tpzed-000000000000000
+  created_at: 2015-07-28T21:34:41.361747000Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-000000000000000
+  modified_at: 2015-07-28T21:34:41.361747000Z
+  updated_at: 2015-07-28T21:34:41.361747000Z
+  tail_uuid: zzzzz-tpzed-projectviewer1a
+  link_class: permission
+  name: can_read
+  head_uuid: zzzzz-j7d0g-fffffffffffffff
+  properties: {}
+
 project_viewer_can_read_project:
   uuid: zzzzz-o0j2j-projviewerreadp
   owner_uuid: zzzzz-tpzed-000000000000000
index 7ba2183d3e7c62d540ace6721fe75a65efb00809..514bb66bb2b55eaabfffd9e2494c59500c1a58bc 100644 (file)
@@ -42,6 +42,26 @@ class Arvados::V1::RepositoriesControllerTest < ActionController::TestCase
     end
   end
 
+  test "get_all_permissions takes into account is_active flag" do
+    r = nil
+    act_as_user users(:active) do
+      r = Repository.create! name: 'active/testrepo'
+    end
+    act_as_system_user do
+      u = users(:active)
+      u.is_active = false
+      u.save!
+    end
+    authorize_with :admin
+    get :get_all_permissions
+    assert_response :success
+    json_response['repositories'].each do |r|
+      r['user_permissions'].each do |user_uuid, perms|
+        refute_equal user_uuid, users(:active).uuid
+      end
+    end
+  end
+
   test "get_all_permissions does not give any access to user without permission" do
     viewer_uuid = users(:project_viewer).uuid
     assert_equal(authorized_keys(:project_viewer).authorized_user_uuid,
@@ -88,15 +108,84 @@ class Arvados::V1::RepositoriesControllerTest < ActionController::TestCase
     end
   end
 
-  test "get_all_permissions lists repos with no authorized keys" do
+  test "get_all_permissions lists all repos regardless of permissions" do
+    act_as_system_user do
+      # Create repos that could potentially be left out of the
+      # permission list by accident.
+
+      # No authorized_key, no username (this can't even be done
+      # without skipping validations)
+      r = Repository.create name: 'root/testrepo'
+      assert r.save validate: false
+
+      r = Repository.create name: 'invalid username / repo name', owner_uuid: users(:inactive).uuid
+      assert r.save validate: false
+    end
+    authorize_with :admin
+    get :get_all_permissions
+    assert_response :success
+    assert_equal(Repository.count, json_response["repositories"].size)
+  end
+
+  test "get_all_permissions lists user permissions for users with no authorized keys" do
     authorize_with :admin
     AuthorizedKey.destroy_all
     get :get_all_permissions
     assert_response :success
     assert_equal(Repository.count, json_response["repositories"].size)
-    assert(json_response["repositories"].any? do |repo|
-             repo["user_permissions"].empty?
-           end, "test is invalid - all repositories have authorized keys")
+    repos_with_perms = []
+    json_response['repositories'].each do |repo|
+      if repo['user_permissions'].any?
+        repos_with_perms << repo['uuid']
+      end
+    end
+    assert_not_empty repos_with_perms, 'permissions are missing'
+  end
+
+  # Ensure get_all_permissions correctly describes what the normal
+  # permission system would do.
+  test "get_all_permissions obeys group permissions" do
+    act_as_user system_user do
+      r = Repository.create!(name: 'admin/groupcanwrite', owner_uuid: users(:admin).uuid)
+      g = Group.create!(group_class: 'group', name: 'repo-writers')
+      u1 = users(:active)
+      u2 = users(:spectator)
+      Link.create!(tail_uuid: g.uuid, head_uuid: r.uuid, link_class: 'permission', name: 'can_manage')
+      Link.create!(tail_uuid: u1.uuid, head_uuid: g.uuid, link_class: 'permission', name: 'can_write')
+      Link.create!(tail_uuid: u2.uuid, head_uuid: g.uuid, link_class: 'permission', name: 'can_read')
+
+      r = Repository.create!(name: 'admin/groupreadonly', owner_uuid: users(:admin).uuid)
+      g = Group.create!(group_class: 'group', name: 'repo-readers')
+      u1 = users(:active)
+      u2 = users(:spectator)
+      Link.create!(tail_uuid: g.uuid, head_uuid: r.uuid, link_class: 'permission', name: 'can_read')
+      Link.create!(tail_uuid: u1.uuid, head_uuid: g.uuid, link_class: 'permission', name: 'can_write')
+      Link.create!(tail_uuid: u2.uuid, head_uuid: g.uuid, link_class: 'permission', name: 'can_read')
+    end
+    authorize_with :admin
+    get :get_all_permissions
+    assert_response :success
+    json_response['repositories'].each do |repo|
+      repo['user_permissions'].each do |user_uuid, perms|
+        u = User.find_by_uuid(user_uuid)
+        if perms['can_read']
+          assert u.can? read: repo['uuid']
+          assert_match /R/, perms['gitolite_permissions']
+        else
+          refute_match /R/, perms['gitolite_permissions']
+        end
+        if perms['can_write']
+          assert u.can? write: repo['uuid']
+          assert_match /RW/, perms['gitolite_permissions']
+        else
+          refute_match /W/, perms['gitolite_permissions']
+        end
+        if perms['can_manage']
+          assert u.can? manage: repo['uuid']
+          assert_match /RW/, perms['gitolite_permissions']
+        end
+      end
+    end
   end
 
   test "default index includes fetch_url" do
index 8ca2a94c8dccae653c23323dc58d7073eece8254..7c3270c5c5112126b52f90c6fc593da44ef7924e 100644 (file)
@@ -44,4 +44,25 @@ class Arvados::V1::VirtualMachinesControllerTest < ActionController::TestCase
     assert_empty(json_response.
                  select { |login| login["user_uuid"] == spectator_uuid })
   end
+
+  test "logins without ssh keys are listed" do
+    u, vm = nil
+    act_as_system_user do
+      u = create :active_user, first_name: 'Bob', last_name: 'Blogin'
+      vm = VirtualMachine.create! hostname: 'foo.shell'
+      Link.create!(tail_uuid: u.uuid,
+                   head_uuid: vm.uuid,
+                   link_class: 'permission',
+                   name: 'can_login',
+                   properties: {'username' => 'bobblogin'})
+    end
+    authorize_with :admin
+    get :logins, id: vm.uuid
+    assert_response :success
+    assert_equal 1, json_response['items'].length
+    assert_equal nil, json_response['items'][0]['public_key']
+    assert_equal nil, json_response['items'][0]['authorized_key_uuid']
+    assert_equal u.uuid, json_response['items'][0]['user_uuid']
+    assert_equal 'bobblogin', json_response['items'][0]['username']
+  end
 end
index 9179acd6803fa2fcb0384a507103b9bf390a5d15..c4d6d5eb7e6eb54eaba2c830035321656668aa33 100644 (file)
@@ -84,7 +84,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
     assert_equal 200, status
   end
 
-  test "connect, subscribe, get event" do
+  def subscribe_test
     state = 1
     spec = nil
     ev_uuid = nil
@@ -115,6 +115,10 @@ class WebsocketTest < ActionDispatch::IntegrationTest
     assert_equal spec.uuid, ev_uuid
   end
 
+  test "connect, subscribe, get event" do
+    subscribe_test()
+  end
+
   test "connect, subscribe, get two events" do
     state = 1
     spec = nil
@@ -646,4 +650,45 @@ class WebsocketTest < ActionDispatch::IntegrationTest
   end
 
 
+  test "connect, subscribe with invalid filter" do
+    state = 1
+    human = nil
+    human_ev_uuid = nil
+
+    authorize_with :admin
+
+    ws_helper :admin do |ws|
+      ws.on :open do |event|
+        # test that #6451 is fixed (invalid filter crashes websockets)
+        ws.send ({method: 'subscribe', filters: [['object_blarg', 'is_a', 'arvados#human']]}.to_json)
+      end
+
+      ws.on :message do |event|
+        d = Oj.load event.data
+        case state
+        when 1
+          assert_equal 200, d["status"]
+          Specimen.create
+          human = Human.create
+          state = 2
+        when 2
+          assert_equal 500, d["status"]
+          state = 3
+          ws.close
+        when 3
+          assert false, "Should not get any more events"
+        end
+      end
+
+    end
+
+    assert_equal 3, state
+
+    # Try connecting again, ensure that websockets server is still running and
+    # didn't crash per #6451
+    subscribe_test()
+
+  end
+
+
 end
index b8d9b6786cc951a7a37f23e2126a39ac70fd2a78..5a661785bd7bef903747b5890bb135b8bacaebf1 100644 (file)
@@ -1,7 +1,47 @@
 require 'test_helper'
 
 class AuthorizedKeyTest < ActiveSupport::TestCase
-  # test "the truth" do
-  #   assert true
-  # end
+  TEST_KEY = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCf5aTI55uyWr44TckP/ELUAyPsdnf5fTZDcSDN4qiMZYAL7TYV2ixwnbPObLObM0GmHSSFLV1KqsuFICUPgkyKoHbAH6XPgmtfOLU60VkGf1v5uxQ/kXCECRCJmPb3K9dIXGEw+1DXPdOV/xG7rJNvo4a9WK9iqqZr8p+VGKM6C017b8BDLk0tuEEjZ5jXcT/ka/hTScxWkKgF6auPOVQ79OA5+0VaYm4uQLzVUdgwVUPWQQecRrtnc08XYM1htpcLDIAbWfUNK7uE6XR3/OhtrJGf05FGbtGguPgi33F9W3Q3yw6saOK5Y3TfLbskgFaEdLgzqK/QSBRk2zBF49Tj test@localhost"
+
+  test 'create and update key' do
+    u1 = users(:active)
+    act_as_user u1 do
+      ak = AuthorizedKey.new(name: "foo", public_key: TEST_KEY, authorized_user_uuid: u1.uuid)
+      assert ak.save, ak.errors.full_messages.to_s
+      ak.name = "bar"
+      assert ak.valid?, ak.errors.full_messages.to_s
+      assert ak.save, ak.errors.full_messages.to_s
+    end
+  end
+
+  test 'duplicate key not permitted' do
+    u1 = users(:active)
+    act_as_user u1 do
+      ak = AuthorizedKey.new(name: "foo", public_key: TEST_KEY, authorized_user_uuid: u1.uuid)
+      assert ak.save
+    end
+    u2 = users(:spectator)
+    act_as_user u2 do
+      ak2 = AuthorizedKey.new(name: "bar", public_key: TEST_KEY, authorized_user_uuid: u2.uuid)
+      refute ak2.valid?
+      refute ak2.save
+      assert_match /already exists/, ak2.errors.full_messages.to_s
+    end
+  end
+
+  test 'attach key to wrong user account' do
+    act_as_user users(:active) do
+      ak = AuthorizedKey.new(name: "foo", public_key: TEST_KEY)
+      ak.authorized_user_uuid = users(:spectator).uuid
+      refute ak.save
+      ak.uuid = nil
+      ak.authorized_user_uuid = users(:admin).uuid
+      refute ak.save
+      ak.uuid = nil
+      ak.authorized_user_uuid = users(:active).uuid
+      assert ak.save, ak.errors.full_messages.to_s
+      ak.authorized_user_uuid = users(:admin).uuid
+      refute ak.save
+    end
+  end
 end
index b24aaa6d3d08c2e470ac3fbb2cb47999e882a53b..49151318a751941742295ad427816414cfe4ad43 100644 (file)
@@ -148,7 +148,9 @@ class InodeCache(object):
         self._total -= obj.cache_size
         del self._entries[obj.cache_priority]
         if obj.cache_uuid:
-            del self._by_uuid[obj.cache_uuid]
+            self._by_uuid[obj.cache_uuid].remove(obj)
+            if not self._by_uuid[obj.cache_uuid]:
+                del self._by_uuid[obj.cache_uuid]
             obj.cache_uuid = None
         if clear:
             _logger.debug("InodeCache cleared %i total now %i", obj.inode, self._total)
@@ -168,9 +170,13 @@ class InodeCache(object):
             self._entries[obj.cache_priority] = obj
             obj.cache_uuid = obj.uuid()
             if obj.cache_uuid:
-                self._by_uuid[obj.cache_uuid] = obj
+                if obj.cache_uuid not in self._by_uuid:
+                    self._by_uuid[obj.cache_uuid] = [obj]
+                else:
+                    if obj not in self._by_uuid[obj.cache_uuid]:
+                        self._by_uuid[obj.cache_uuid].append(obj)
             self._total += obj.objsize()
-            _logger.debug("InodeCache touched %i (size %i) total now %i", obj.inode, obj.objsize(), self._total)
+            _logger.debug("InodeCache touched %i (size %i) (uuid %s) total now %i", obj.inode, obj.objsize(), obj.cache_uuid, self._total)
             self.cap_cache()
         else:
             obj.cache_priority = None
@@ -188,6 +194,11 @@ class InodeCache(object):
     def find(self, uuid):
         return self._by_uuid.get(uuid)
 
+    def clear(self):
+        self._entries.clear()
+        self._by_uuid.clear()
+        self._total = 0
+
 class Inodes(object):
     """Manage the set of inodes.  This is the mapping from a numeric id
     to a concrete File or Directory object"""
@@ -244,6 +255,17 @@ class Inodes(object):
     def invalidate_entry(self, inode, name):
         llfuse.invalidate_entry(inode, name)
 
+    def clear(self):
+        self.inode_cache.clear()
+
+        for k,v in self._entries.items():
+            try:
+                v.finalize()
+            except Exception as e:
+                _logger.exception("Error during finalize of inode %i", k)
+
+        self._entries.clear()
+
 
 def catch_exceptions(orig_func):
     """Catch uncaught exceptions and log them consistently."""
@@ -314,12 +336,7 @@ class Operations(llfuse.Operations):
             self.events.close()
             self.events = None
 
-        for k,v in self.inodes.items():
-            try:
-                v.finalize()
-            except Exception as e:
-                _logger.exception("Error during finalize of inode %i", k)
-        self.inodes = None
+        self.inodes.clear()
 
     def access(self, inode, mode, ctx):
         return True
@@ -333,20 +350,21 @@ class Operations(llfuse.Operations):
     def on_event(self, ev):
         if 'event_type' in ev:
             with llfuse.lock:
-                item = self.inodes.inode_cache.find(ev["object_uuid"])
-                if item is not None:
-                    item.invalidate()
-                    if ev["object_kind"] == "arvados#collection":
-                        new_attr = ev.get("properties") and ev["properties"].get("new_attributes") and ev["properties"]["new_attributes"]
-
-                        # new_attributes.modified_at currently lacks subsecond precision (see #6347) so use event_at which
-                        # should always be the same.
-                        #record_version = (new_attr["modified_at"], new_attr["portable_data_hash"]) if new_attr else None
-                        record_version = (ev["event_at"], new_attr["portable_data_hash"]) if new_attr else None
-
-                        item.update(to_record_version=record_version)
-                    else:
-                        item.update()
+                items = self.inodes.inode_cache.find(ev["object_uuid"])
+                if items is not None:
+                    for item in items:
+                        item.invalidate()
+                        if ev["object_kind"] == "arvados#collection":
+                            new_attr = ev.get("properties") and ev["properties"].get("new_attributes") and ev["properties"]["new_attributes"]
+
+                            # new_attributes.modified_at currently lacks subsecond precision (see #6347) so use event_at which
+                            # should always be the same.
+                            #record_version = (new_attr["modified_at"], new_attr["portable_data_hash"]) if new_attr else None
+                            record_version = (ev["event_at"], new_attr["portable_data_hash"]) if new_attr else None
+
+                            item.update(to_record_version=record_version)
+                        else:
+                            item.update()
 
                 oldowner = ev.get("properties") and ev["properties"].get("old_attributes") and ev["properties"]["old_attributes"].get("owner_uuid")
                 olditemparent = self.inodes.inode_cache.find(oldowner)
index 16b3bb2cdb53c80a40166bea4b6ab4e816435a90..de12fcce1763764e5ef80e6ef2ce62d70178b9d6 100644 (file)
@@ -423,8 +423,8 @@ class CollectionDirectory(CollectionDirectoryBase):
                 return True
             finally:
                 self._updating_lock.release()
-        except arvados.errors.NotFoundError:
-            _logger.exception("arv-mount %s: error", self.collection_locator)
+        except arvados.errors.NotFoundError as e:
+            _logger.error("Error fetching collection '%s': %s", self.collection_locator, e)
         except arvados.errors.ArgumentError as detail:
             _logger.warning("arv-mount %s: error %s", self.collection_locator, detail)
             if self.collection_record is not None and "manifest_text" in self.collection_record:
@@ -524,12 +524,17 @@ will appear if it exists.
                     self.inode, self.inodes, self.api, self.num_retries, k))
 
             if e.update():
-                self._entries[k] = e
+                if k not in self._entries:
+                    self._entries[k] = e
+                else:
+                    self.inodes.del_entry(e)
                 return True
             else:
+                self.inodes.del_entry(e)
                 return False
         except Exception as e:
             _logger.debug('arv-mount exception keep %s', e)
+            self.inodes.del_entry(e)
             return False
 
     def __getitem__(self, item):
diff --git a/services/fuse/tests/mount_test_base.py b/services/fuse/tests/mount_test_base.py
new file mode 100644 (file)
index 0000000..3b7cbaa
--- /dev/null
@@ -0,0 +1,72 @@
+import arvados
+import arvados.safeapi
+import arvados_fuse as fuse
+import llfuse
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+import unittest
+import logging
+import multiprocessing
+import run_test_server
+
+logger = logging.getLogger('arvados.arv-mount')
+
+class MountTestBase(unittest.TestCase):
+    def setUp(self, api=None):
+        # The underlying C implementation of open() makes a fstat() syscall
+        # with the GIL still held.  When the GETATTR message comes back to
+        # llfuse (which in these tests is in the same interpreter process) it
+        # can't acquire the GIL, so it can't service the fstat() call, so it
+        # deadlocks.  The workaround is to run some of our test code in a
+        # separate process.  Forturnately the multiprocessing module makes this
+        # relatively easy.
+        self.pool = multiprocessing.Pool(1)
+
+        self.keeptmp = tempfile.mkdtemp()
+        os.environ['KEEP_LOCAL_STORE'] = self.keeptmp
+        self.mounttmp = tempfile.mkdtemp()
+        run_test_server.run()
+        run_test_server.authorize_with("admin")
+        self.api = api if api else arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
+
+    def make_mount(self, root_class, **root_kwargs):
+        self.operations = fuse.Operations(os.getuid(), os.getgid(), enable_write=True)
+        self.operations.inodes.add_entry(root_class(
+            llfuse.ROOT_INODE, self.operations.inodes, self.api, 0, **root_kwargs))
+        llfuse.init(self.operations, self.mounttmp, [])
+        threading.Thread(None, llfuse.main).start()
+        # wait until the driver is finished initializing
+        self.operations.initlock.wait()
+        return self.operations.inodes[llfuse.ROOT_INODE]
+
+    def tearDown(self):
+        self.pool.terminate()
+        self.pool.join()
+        del self.pool
+
+        # llfuse.close is buggy, so use fusermount instead.
+        #llfuse.close(unmount=True)
+
+        count = 0
+        success = 1
+        while (count < 9 and success != 0):
+          success = subprocess.call(["fusermount", "-u", self.mounttmp])
+          time.sleep(0.1)
+          count += 1
+
+        self.operations.destroy()
+
+        os.rmdir(self.mounttmp)
+        shutil.rmtree(self.keeptmp)
+        run_test_server.reset()
+
+    def assertDirContents(self, subdir, expect_content):
+        path = self.mounttmp
+        if subdir:
+            path = os.path.join(path, subdir)
+        self.assertEqual(sorted(expect_content), sorted(llfuse.listdir(path)))
diff --git a/services/fuse/tests/performance/test_collection_performance.py b/services/fuse/tests/performance/test_collection_performance.py
new file mode 100644 (file)
index 0000000..c4eadca
--- /dev/null
@@ -0,0 +1,477 @@
+import arvados
+import arvados_fuse as fuse
+import llfuse
+import logging
+import os
+import sys
+import unittest
+from .. import run_test_server
+from ..mount_test_base import MountTestBase
+
+logger = logging.getLogger('arvados.arv-mount')
+
+from performance_profiler import profiled
+
+def fuse_createCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.createCollectionWithMultipleBlocks()
+
+        @profiled
+        def createCollectionWithMultipleBlocks(self):
+            for i in range(0, streams):
+                os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
+
+                # Create files
+                for j in range(0, files_per_stream):
+                    with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
+                        f.write(data)
+
+    Test().runTest()
+
+def fuse_readContentsFromCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.readContentsFromCollectionWithMultipleBlocks()
+
+        @profiled
+        def readContentsFromCollectionWithMultipleBlocks(self):
+            for i in range(0, streams):
+                d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
+                for j in range(0, files_per_stream):
+                    with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
+                        self.assertEqual(data, f.read())
+
+    Test().runTest()
+
+def fuse_moveFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.moveFileFromCollectionWithMultipleBlocks()
+
+        @profiled
+        def moveFileFromCollectionWithMultipleBlocks(self):
+            d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+            self.assertIn(filename, d1)
+
+            os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
+
+            d1 = llfuse.listdir(os.path.join(mounttmp))
+            self.assertIn('moved_from_'+stream+'_'+filename, d1)
+
+            d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+            self.assertNotIn(filename, d1)
+
+    Test().runTest()
+
+def fuse_deleteFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.deleteFileFromCollectionWithMultipleBlocks()
+
+        @profiled
+        def deleteFileFromCollectionWithMultipleBlocks(self):
+            os.remove(os.path.join(mounttmp, stream, filename))
+
+    Test().runTest()
+
+# Create a collection with 2 streams, 3 files_per_stream, 2 blocks_per_file, 2**26 bytes_per_block
+class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
+    def setUp(self):
+        super(CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile, self).setUp()
+
+    def test_CreateCollectionWithManyBlocksAndMoveAndDeleteFile(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+        self.assertTrue(m.writable())
+
+        streams = 2
+        files_per_stream = 3
+        blocks_per_file = 2
+        bytes_per_block = 2**26
+
+        data = 'x' * blocks_per_file * bytes_per_block
+
+        self.pool.apply(fuse_createCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+        for i in range(0, streams):
+            self.assertIn('./stream' + str(i), collection2["manifest_text"])
+
+        for i in range(0, files_per_stream):
+            self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
+
+        # Read file contents
+        self.pool.apply(fuse_readContentsFromCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
+
+        # Move file0.txt out of the streams into .
+        for i in range(0, streams):
+            self.pool.apply(fuse_moveFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+        manifest_streams = collection2['manifest_text'].split('\n')
+        self.assertEqual(4, len(manifest_streams))
+
+        for i in range(0, streams):
+            self.assertIn('file0.txt', manifest_streams[0])
+
+        for i in range(0, streams):
+            self.assertNotIn('file0.txt', manifest_streams[i+1])
+
+        for i in range(0, streams):
+            for j in range(1, files_per_stream):
+                self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
+
+        # Delete 'file1.txt' from all the streams
+        for i in range(0, streams):
+            self.pool.apply(fuse_deleteFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+        manifest_streams = collection2['manifest_text'].split('\n')
+        self.assertEqual(4, len(manifest_streams))
+
+        for i in range(0, streams):
+            self.assertIn('file0.txt', manifest_streams[0])
+
+        self.assertNotIn('file1.txt', collection2['manifest_text'])
+
+        for i in range(0, streams):
+            for j in range(2, files_per_stream):
+                self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
+
+
+def fuse_createCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.createCollectionWithManyFiles()
+
+        @profiled
+        def createCollectionWithManyFiles(self):
+            for i in range(0, streams):
+                os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
+
+                # Create files
+                for j in range(0, files_per_stream):
+                    with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
+                        f.write(data)
+
+    Test().runTest()
+
+def fuse_readContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.readContentsFromCollectionWithManyFiles()
+
+        @profiled
+        def readContentsFromCollectionWithManyFiles(self):
+            for i in range(0, streams):
+                d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
+                for j in range(0, files_per_stream):
+                    with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
+                        self.assertEqual(data, f.read())
+
+    Test().runTest()
+
+def fuse_moveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.moveFileFromCollectionWithManyFiles()
+
+        @profiled
+        def moveFileFromCollectionWithManyFiles(self):
+            d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+            self.assertIn(filename, d1)
+
+            os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
+
+            d1 = llfuse.listdir(os.path.join(mounttmp))
+            self.assertIn('moved_from_'+stream+'_'+filename, d1)
+
+            d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+            self.assertNotIn(filename, d1)
+
+    Test().runTest()
+
+def fuse_deleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.deleteFileFromCollectionWithManyFiles()
+
+        @profiled
+        def deleteFileFromCollectionWithManyFiles(self):
+            os.remove(os.path.join(mounttmp, stream, filename))
+
+    Test().runTest()
+
+# Create a collection with two streams, each with 200 files
+class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
+    def setUp(self):
+        super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
+
+    def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+        self.assertTrue(m.writable())
+
+        streams = 2
+        files_per_stream = 200
+        data = 'x'
+
+        self.pool.apply(fuse_createCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+        for i in range(0, streams):
+            self.assertIn('./stream' + str(i), collection2["manifest_text"])
+
+        for i in range(0, files_per_stream):
+            self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
+
+        # Read file contents
+        self.pool.apply(fuse_readContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
+
+        # Move file0.txt out of the streams into .
+        for i in range(0, streams):
+            self.pool.apply(fuse_moveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+        manifest_streams = collection2['manifest_text'].split('\n')
+        self.assertEqual(4, len(manifest_streams))
+
+        for i in range(0, streams):
+            self.assertIn('file0.txt', manifest_streams[0])
+
+        for i in range(0, streams):
+            self.assertNotIn('file0.txt', manifest_streams[i+1])
+
+        for i in range(0, streams):
+            for j in range(1, files_per_stream):
+                self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
+
+        # Delete 'file1.txt' from all the streams
+        for i in range(0, streams):
+            self.pool.apply(fuse_deleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+        manifest_streams = collection2['manifest_text'].split('\n')
+        self.assertEqual(4, len(manifest_streams))
+
+        for i in range(0, streams):
+            self.assertIn('file0.txt', manifest_streams[0])
+
+        self.assertNotIn('file1.txt', collection2['manifest_text'])
+
+        for i in range(0, streams):
+            for j in range(2, files_per_stream):
+                self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
+
+
+def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.magicDirTest_moveFileFromCollection()
+
+        @profiled
+        def magicDirTest_moveFileFromCollection(self):
+            os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
+
+    Test().runTest()
+
+def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.magicDirTest_removeFileFromCollection()
+
+        @profiled
+        def magicDirTest_removeFileFromCollection(self):
+            os.remove(os.path.join(mounttmp, collection1, filename))
+
+    Test().runTest()
+
+class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
+    def setUp(self):
+        super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
+
+    @profiled
+    def magicDirTest_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
+        # Create collection
+        collection = arvados.collection.Collection(api_client=self.api)
+        for j in range(0, files_per_stream):
+            with collection.open("file"+str(j)+".txt", "w") as f:
+                f.write(data)
+        collection.save_new()
+        return collection
+
+    @profiled
+    def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1, data='x'):
+        mount_ls = os.listdir(os.path.join(self.mounttmp, collection))
+
+        files = {}
+        for j in range(0, files_per_stream):
+            files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
+
+        for k, v in files.items():
+            with open(os.path.join(self.mounttmp, collection, k)) as f:
+                self.assertEqual(v, f.read())
+
+    def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
+        streams = 2
+        files_per_stream = 200
+        data = 'x'
+
+        collection1 = self.magicDirTest_createCollectionWithManyFiles()
+        # Create collection with multiple files
+        collection2 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream, data)
+
+        # Mount FuseMagicDir
+        self.make_mount(fuse.MagicDirectory)
+
+        self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams, files_per_stream, data)
+
+        # Move file0.txt out of the collection2 into collection1
+        self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(),
+              collection1.manifest_locator(), 'stream0', 'file0.txt',))
+        updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
+        self.assertFalse('file0.txt' in updated_collection['manifest_text'])
+        self.assertTrue('file1.txt' in updated_collection['manifest_text'])
+
+        # Delete file1.txt from collection2
+        self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(), 'stream0', 'file1.txt',))
+        updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
+        self.assertFalse('file1.txt' in updated_collection['manifest_text'])
+        self.assertTrue('file2.txt' in updated_collection['manifest_text'])
+
+
+def magicDirTest_MoveAllFilesFromCollection(mounttmp, from_collection, to_collection, stream, files_per_stream):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.magicDirTest_moveAllFilesFromCollection()
+
+        @profiled
+        def magicDirTest_moveAllFilesFromCollection(self):
+            for j in range(0, files_per_stream):
+                os.rename(os.path.join(mounttmp, from_collection, 'file'+str(j)+'.txt'), os.path.join(mounttmp, to_collection, 'file'+str(j)+'.txt'))
+
+    Test().runTest()
+
+class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(MountTestBase):
+    def setUp(self):
+        super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother, self).setUp()
+
+    @profiled
+    def magicDirTestMoveAllFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
+            blocks_per_file=0, bytes_per_block=0, data='x'):
+        # Create collection
+        collection = arvados.collection.Collection(api_client=self.api)
+        for j in range(0, files_per_stream):
+            with collection.open("file"+str(j)+".txt", "w") as f:
+                f.write(data)
+        collection.save_new()
+        return collection
+
+    def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(self):
+        streams = 2
+        files_per_stream = 200
+        data = 'x'
+
+        collection1 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles()
+        # Create collection with multiple files
+        collection2 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
+
+        # Mount FuseMagicDir
+        self.make_mount(fuse.MagicDirectory)
+
+        # Move all files from collection2 into collection1
+        self.pool.apply(magicDirTest_MoveAllFilesFromCollection, (self.mounttmp, collection2.manifest_locator(),
+                  collection1.manifest_locator(), 'stream0', files_per_stream,))
+
+        updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
+        file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
+        for name in file_names:
+            self.assertFalse(name in updated_collection['manifest_text'])
+
+        updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
+        for name in file_names:
+            self.assertTrue(name in updated_collection['manifest_text'])
+
+
+# Move one file at a time from one collection into another
+class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(MountTestBase):
+    def setUp(self):
+        super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother, self).setUp()
+
+    @profiled
+    def magicDirTestMoveFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
+        # Create collection
+        collection = arvados.collection.Collection(api_client=self.api)
+        for j in range(0, files_per_stream):
+            with collection.open("file"+str(j)+".txt", "w") as f:
+                f.write(data)
+        collection.save_new()
+        return collection
+
+    def magicDirTestMoveFiles_oneEachIntoAnother(self, from_collection, to_collection, files_per_stream):
+        for j in range(0, files_per_stream):
+            self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
+                  to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
+
+    def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveEachFileIntoAnother(self):
+        streams = 2
+        files_per_stream = 200
+        data = 'x'
+
+        collection1 = self.magicDirTestMoveFiles_createCollectionWithManyFiles()
+        # Create collection with multiple files
+        collection2 = self.magicDirTestMoveFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
+
+        # Mount FuseMagicDir
+        self.make_mount(fuse.MagicDirectory)
+
+        # Move all files from collection2 into collection1
+        self.magicDirTestMoveFiles_oneEachIntoAnother(collection2, collection1, files_per_stream)
+
+        updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
+        file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
+        for name in file_names:
+            self.assertFalse(name in updated_collection['manifest_text'])
+
+        updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
+        for name in file_names:
+            self.assertTrue(name in updated_collection['manifest_text'])
+
+class FuseListLargeProjectContents(MountTestBase):
+    @profiled
+    def getProjectWithManyCollections(self):
+        project_contents = llfuse.listdir(self.mounttmp)
+        self.assertEqual(201, len(project_contents))
+        self.assertIn('Collection_1', project_contents)
+        return project_contents
+
+    @profiled
+    def listContentsInProjectWithManyCollections(self, project_contents):
+        project_contents = llfuse.listdir(self.mounttmp)
+        self.assertEqual(201, len(project_contents))
+        self.assertIn('Collection_1', project_contents)
+
+        for collection_name in project_contents:
+            collection_contents = llfuse.listdir(os.path.join(self.mounttmp, collection_name))
+            self.assertIn('baz', collection_contents)
+
+    def test_listLargeProjectContents(self):
+        self.make_mount(fuse.ProjectDirectory,
+                        project_object=run_test_server.fixture('groups')['project_with_201_collections'])
+        project_contents = self.getProjectWithManyCollections()
+        self.listContentsInProjectWithManyCollections(project_contents)
index 215b6c114da2a1cc5f505cc262dfb1527bd0defd..b9309746a50f181803adf9063d998c5f51a0486d 100644 (file)
@@ -15,63 +15,11 @@ import unittest
 import logging
 import multiprocessing
 import run_test_server
+import mock
 
-logger = logging.getLogger('arvados.arv-mount')
+from mount_test_base import MountTestBase
 
-class MountTestBase(unittest.TestCase):
-    def setUp(self):
-        # The underlying C implementation of open() makes a fstat() syscall
-        # with the GIL still held.  When the GETATTR message comes back to
-        # llfuse (which in these tests is in the same interpreter process) it
-        # can't acquire the GIL, so it can't service the fstat() call, so it
-        # deadlocks.  The workaround is to run some of our test code in a
-        # separate process.  Forturnately the multiprocessing module makes this
-        # relatively easy.
-        self.pool = multiprocessing.Pool(1)
-
-        self.keeptmp = tempfile.mkdtemp()
-        os.environ['KEEP_LOCAL_STORE'] = self.keeptmp
-        self.mounttmp = tempfile.mkdtemp()
-        run_test_server.run()
-        run_test_server.authorize_with("admin")
-        self.api = arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
-
-    def make_mount(self, root_class, **root_kwargs):
-        self.operations = fuse.Operations(os.getuid(), os.getgid(), enable_write=True)
-        self.operations.inodes.add_entry(root_class(
-            llfuse.ROOT_INODE, self.operations.inodes, self.api, 0, **root_kwargs))
-        llfuse.init(self.operations, self.mounttmp, [])
-        threading.Thread(None, llfuse.main).start()
-        # wait until the driver is finished initializing
-        self.operations.initlock.wait()
-        return self.operations.inodes[llfuse.ROOT_INODE]
-
-    def tearDown(self):
-        self.pool.terminate()
-        self.pool.join()
-        del self.pool
-
-        # llfuse.close is buggy, so use fusermount instead.
-        #llfuse.close(unmount=True)
-
-        count = 0
-        success = 1
-        while (count < 9 and success != 0):
-          success = subprocess.call(["fusermount", "-u", self.mounttmp])
-          time.sleep(0.1)
-          count += 1
-
-        self.operations.destroy()
-
-        os.rmdir(self.mounttmp)
-        shutil.rmtree(self.keeptmp)
-        run_test_server.reset()
-
-    def assertDirContents(self, subdir, expect_content):
-        path = self.mounttmp
-        if subdir:
-            path = os.path.join(path, subdir)
-        self.assertEqual(sorted(expect_content), sorted(llfuse.listdir(path)))
+logger = logging.getLogger('arvados.arv-mount')
 
 
 class FuseMountTest(MountTestBase):
@@ -163,8 +111,8 @@ class FuseNoAPITest(MountTestBase):
 
 
 class FuseMagicTest(MountTestBase):
-    def setUp(self):
-        super(FuseMagicTest, self).setUp()
+    def setUp(self, api=None):
+        super(FuseMagicTest, self).setUp(api=api)
 
         cw = arvados.CollectionWriter()
 
@@ -172,7 +120,8 @@ class FuseMagicTest(MountTestBase):
         cw.write("data 1")
 
         self.testcollection = cw.finish()
-        self.api.collections().create(body={"manifest_text":cw.manifest_text()}).execute()
+        self.test_manifest = cw.manifest_text()
+        self.api.collections().create(body={"manifest_text":self.test_manifest}).execute()
 
     def runTest(self):
         self.make_mount(fuse.MagicDirectory)
@@ -1062,6 +1011,25 @@ class FuseFsyncTest(FuseMagicTest):
         self.pool.apply(fuseFsyncTestHelper, (self.mounttmp, self.testcollection))
 
 
+class MagicDirApiError(FuseMagicTest):
+    def setUp(self):
+        api = mock.MagicMock()
+        super(MagicDirApiError, self).setUp(api=api)
+        api.collections().get().execute.side_effect = iter([Exception('API fail'), {"manifest_text": self.test_manifest}])
+        api.keep.get.side_effect = Exception('Keep fail')
+
+    def runTest(self):
+        self.make_mount(fuse.MagicDirectory)
+
+        self.operations.inodes.inode_cache.cap = 1
+        self.operations.inodes.inode_cache.min_entries = 2
+
+        with self.assertRaises(OSError):
+            llfuse.listdir(os.path.join(self.mounttmp, self.testcollection))
+
+        llfuse.listdir(os.path.join(self.mounttmp, self.testcollection))
+
+
 class FuseUnitTest(unittest.TestCase):
     def test_sanitize_filename(self):
         acceptable = [